Basic Knowledge


Workspace 工作空间 & Environment 工作环境

getwd() # 查看工作空间
setwd("./data") # 设置工作空间
ls()    # 查看工作空间目录
y <- data.frame(a = 1, b = "a")
dput(y,file = "y.R") # 创建一个R文件
dump(c("x","y"),file ="data.R") # 创建一个R文件
new.y <- dget("y.r") # 读取一个R文件
rm(x,y) # 移除Environment里面的变量
source("mycode.R") # 读取一个R文件中的Function

Time

Inpute time

x<-as.Date("1970-01-01");unclass(x)

Get time and part of time 获取当前时间提取部分时间

x <- Sys.time() # POSIXct POSIXt
y <- date() # character
p<-as.POSIXlt(x) # POSIXlt POSIXt
names(unclass(p))
p$sec

Change string to Date 将文字改为时间格式ʽ

datestring <- c("2012-11-16 10:40:00")
x <- strptime(datestring, "%Y-%m-%d %H:%M:%S")

Time difference 算时差

x <- as.Date("2012-01-01")
y <- strptime("2011-April-9 11:34:21", "%Y-%m-%d %H:%M:%S")
x <- as.POSIXlt(x)
x-y # Time difference of 356.3 days
x <- as.Date("2012-03-01") 
y <- as.Date("2012-02-28")
x-y # Time difference of 2 days
x <- as.POSIXct("2012-10-25 01:00:00")
y <- as.POSIXct("2012-10-25 06:00:00", tz = "GMT")
y-x # Time difference of 1 hours

Running Time 算运行时间

system.time(read.table("a.txt",header = TRUE,sep = "\t"))

Basic Function

if-else

function1<-function(x){
if(x>3){y<-10}else{y<-0}
print(y)}

for

function2<-function(){
  x<-c("a",2,"c","d")
  for(i in 1:4){
    print(x[i])}
  for(i in seq_along(x)){
    print(x[i])}
  for(letter in x){
    print(letter) # 2在list里面自动转换成character
  }
  y<-matrix(1:6,2,3) # nested for loops
  for(i in seq_len(nrow(y))){
    for (j in seq_len(ncol(y))){
      print(y[i,j])}}}

while

function3<-function(){
  count<-0
  while(count<10){print(count);count<-count+1}
  z<-5
  while(z>=3 && z<=10){ # random walking
    print(z);coin <- rbinom(1,1,0.5)
    if(coin ==1){z<-z+1}else{z<-z-1}}}

repeat

function4<-function(){
x0<-1
repeat{x1<- 2*x0
if(abs(x1-x0)>100){break}else{
    x0<-x1}}}

lapply

x<- 1:4
lapply(x, runif) # runif generates random deviates from uniform distribution
x<-1:4
lapply(x, runif, min = 0, max = 10) # design parameter in lapply
x <- list(a = matrix(1:4, 2,2), b = matrix(1:6, 3,2))
lapply(x, function(elt) elt[,1]) # design function in lapply

apply 循环语句 (可选择维度dimension)

x<-matrix(1:8,4,2)
rowSums = apply(x, 1, sum); rowMeans = apply(x, 1, mean)
colSums = apply(x, 2, sum); colMeans = apply(x, 2, mean) # 1,2 are dimensions
x <-matrix(rnorm(200),20,10)
apply(x,1, quantile, probs = c(0.25, 0.75)) # design parameter in apply
a <- array(rnorm(2*2*10),c(2,2,10)); apply(a, c(1,2),mean) # 3-dimensions

mapply 循环语句 (function + parameter)

mapply(rep, 1:4, 4:1)
list(rep(1,4),rep(2,3),rep(3,2),rep(4,1))

tapply 循环语句 (加入factor)

x <-c(rnorm(10),runif(10),rnorm(10,1))
f<-gl(3,10) # Generate factors by specifying the pattern of their levels
tapply(x,f,mean)
tapply(x,f,mean,simplify = FALSE)
tapply(x,f,range)

Debug 报错程序

printmessage <- function(x){
  if(is.na(x))
    print("x is a missing value")
  else if(x>0)
    print("x is greater than zero")
  else
    print("x is less than or equal to zero")
  invisible(x)}
printmessage(1); printmessage(NA); printmessage(log(-1))

Vectoried Operations 向量运算和矩阵运算

x <- 1:4; y <-6:9
x+y,  x>2 , x>=2 , x*y , x/y # element-wise 按位置一一对应运算
x <- matrix(1:4,2,2); y<- matrix(rep(10,4),2,2)
x*y        # element-wise multiplication 
x %*% y    # true matrix multiplication
x/y        # element-wise multiplication

Lexical Scoping 词法域

function in function 函数套函数 变量的定义 提取环境变量参数

make.power<-function(n){
  pow<-function(x){x^n}; pow}
cube<-make.power(3)
cube
## function(x){x^n}
## <environment: 0x00000000083e0fa8>
cube(4)
## [1] 64
ls(environment(cube))
## [1] "n"   "pow"
get("n",environment(cube))
## [1] 3

different from Dynamic Scoping 环境变量作用范围

y<-10
f<-function(x){y<-2;y^2+g(x)}
g<-function(x){x*y}
f(3) # 34

Function

Function

myfunction <- function(){
    x <- rnorm(100); mean(x)}
second <- function(x){
    x + rnorm(length(x))}

Several input parameters and several output 自定义函数多个输入多个输出

MultiFun <- function (x, a = 0, b = 100, c = 2 ){
  y <- a + b + x
  z <- c + b + x
  out = list(y, z)
  return(out)
}

Random sampling 随机生成数

The Normal Distribution?

set.seed(252)
rnorm(5)

The Binomial Distribution

rbinom(100,1,0.5)

The Poisson Distribution

rpois(100, 5)

Random Samples

sample(1:10,15,replace = TRUE)
sample(1:10,4)
sample(letters,4)

Random Permutations 随机排列

sample(1:10)
sample(1:10, replace = TRUE)

Creating sequences 生成序列

s1 <- seq(1,10,by=2);s1
s2 <- seq(1,10,length =3 );s2
x <-c(1,3,8,25,100); seq(along=x)

Common transforms 常用数字处理

x <- 3.475
abs(x) # absolute 绝对值
sqrt(x) # square root 开方
ceiling(x) # ceiling(3.475)is 4 取上界
floor(x) # floor(3.475) is 3  取下界
round(x, digits = 2) # (3.475,digits=2) is 3.48
signif(x, digits = 2) # (3.475,digits=2) is 3.5

Getting and Cleaning Data 获取数据 清理数据


Getting Data 获取数据

Downloading Files 下载文件

getwd()
setwd("./data")
if (!dir.exists("data") & !file.exists("data")) {  # check to see if the directory exists
  dir.create("data")}  # create it if it doesn't exists
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD"
download.file(fileUrl, destfile = "./data/cameras.csv")
list.files("./data")

Reading Local Files csv

cameraData <- read.table("./data/cameras.csv")
cameraData <- read.table("./data/cameras.csv",sep = ",", header = TRUE)
head(cameraData)
cameraData <- read.csv("./data/cameras.csv") # default as sep = ",", header = TRUE
head(cameraData)

Write a csv file

write.csv(c(1,1,1,1,1),"AA.csv")

PS: Write a txt file

big_df <- data.frame(x=rnorm(1E6),y=rnorm(1E6))
write.table(big_df, file= "a.txt",row.names = FALSE, col.names = TRUE, sep = "\t",quote = FALSE)

Reading XML Files

library(XML)
fileUrl<-"http://www.w3schools.com/xml/simple.xml"
doc <-xmlTreeParse(fileUrl,useInternal = TRUE)
rootNode<-xmlRoot(doc) ##get with XML root
xmlName(rootNode)
names(rootNode)
rootNode[[1]]
rootNode[[1]][[1]]
xmlSApply(rootNode,xmlValue)
xpathSApply(rootNode, "//name", xmlValue) # must have "useInternal = TRUE" or xpathSApply doesn't work
xpathSApply(rootNode, "//price", xmlValue)

Reading JSON Files

library(jsonlite)
jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
names(jsonData)
names(jsonData$owner)
jsonData$owner$login
myjson <-toJSON(iris, pretty = TRUE)
cat(myjson)
iris2 <- fromJSON(myjson)
head(iris2)

Reading from MySQL

Connecting and listing databases

library(DBI)
library(RMySQL)
ucscDb <- dbConnect(MySQL(),user = "genome", host = "genome-mysql.cse.ucsc.edu")
result <- dbGetQuery(ucscDb,"show databases;");
dbDisconnect(ucscDb)

Conneting to hg19 and listing tables

hg19<- dbConnect(MySQL(),user = "genome",db = "hg19", host = "genome-mysql.cse.ucsc.edu")
allTables<-dbListTables(hg19)
length(allTables)

Get dimensions of a specific table

dbListFields(hg19,"affyU133Plus2")
dbGetQuery(hg19,"select count(*) from affyU133Plus2")
affyData<-dbReadTable(hg19,"affyU133Plus2")
head(affyData)

Select a specific subset

query<-dbSendQuery(hg19,"select*from affyU133Plus2 where misMatches between 1 and 3")
affyMis<-fetch(query);quantile(affyMis$misMatches)
affyMisSmall<-fetch(query,n=10);dbClearResult(query);
dim(affyMisSmall)
dbDisconnect(hg19)

Reading from HDF5

Create a HDF5 file

source("http://bioconductor.org/biocLite.R")
library(BiocInstaller)
biocLite("rhdf5")
library(rhdf5)
created = h5createFile("example.h5")
created = h5createGroup("example.h5","foo")
created = h5createGroup("example.h5","baa")
created = h5createGroup("example.h5","foo/foobaa")
h5ls("example.h5")

Write a HDF5 file

A = matrix(1:10,5,2)
h5write(A, "example.h5","foo/A")
B = array(seq(0.1,2.0,by=0.1),dim=c(5,2,2))
attr(B,"scale") <- "liter"
h5write(B,"example.h5","foo/foobaa/B")
h5ls("example.h5")
df = data.frame(1L:5L,seq(0,1,length.out = 5),
                c("ab","cde","fghi","a","s"), stringsAsFactors = FALSE)
h5write(df, "example.h5", "df")
h5ls("example.h5")

Read from a HDF5 file

readA = h5read("example.h5","foo/A")
readB = h5read("example.h5","foo/foobaa/B")
readdf = h5read("example.h5","df")
h5write(c(12,13,14),"example.h5","foo/A",index = list(1:3,1))
h5read("example.h5","foo/A")

Reading Excel Files

library(rJava)
library(xlsxjars)
library(xlsx)
Pro426<- read.xlsx("z.xlsx",sheetIndex = 1, colIndex = 1:3, rowIndex = 1:4)

Write a Excel file

write.xlsx(DF,"df.xlsx")

Reading data from the web

con = url("http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en")
htmlCode = readLines(con)
close(con)
htmlCode
library(XML)
url <- "http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en"
html<- htmlTreeParse(url,useInternalNodes = T)
xpathSApply(html,"//title",xmlValue)
xpathSApply(html,"//td",xmlValue)
library(httr)
html2 = GET(url)
content2 = content(html2,as="text")
parsedHtml = htmlParse(content2, asText = TRUE)
xpathSApply(parsedHtml,"//title",xmlValue)
pg1 = GET("http://httpbin.org/basic-auth/user/passwd"); pg1
pg2 = GET("http://httpbin.org/basic-auth/user/passwd",
          authenticate("user","passwd")); pg2; names(pg2)
google = handle("http://google.com")
pg1 = GET(handle=google,path="/"); pg1
pg2 = GET(handle = google,path="search"); pg2

Data generation and processing by data.table 快速构建处理数据

Generate and get files used by data.table

library(data.table)
DF = data.frame(x = rnorm(9),y = rep(c("a","b","c"),3),z=rnorm(9));DF
DF = data.frame(x = rnorm(9),y = rep(c("a","b","c"),each=3),z=rnorm(9));DF;class(DF)
DT = data.table(x = rnorm(9),y = rep(c("a","b","c"),each=3),z=rnorm(9));DT;class(DT)
tables()

subset 快速截取子集

DT[2,]; DT[DT$y=="a",]; DT[c(2,3)]

data processing 按列快速处理

DT[,list(mean(x),sum(z))]; DT[,table(y)]; x<-DT[,w:=z^2] #, , , 
y<-DT[,m:={tmp <- (x+z); log2(tmp+5)}]
y<-DT[,a:=x>0]
y<-DT[,b:=mean(x+w),by=a] # , , factor

factor 分类处理

set.seed(123)
DT <- data.table(x=sample(letters[1:3], 1E5, TRUE))
DT[, .N,by=x] # way 1
DT <- data.table(x=rep(c("a","b","c"),each=100),y=rnorm(300))
setkey(DT,x) # way 2
DT['a']
DT1 <- data.table(x=c('a','a','b','dt1'),y=1:4)
DT2 <- data.table(x=c('a','b','dt2'),z=5:7)
setkey(DT1,x); setkey(DT2,x)
merge(DT1,DT2) # way 3 - merge by key

DT2 change following DT !!!

DT2<-DT
DT[,y:="2"]
DT[DT$y==2,] 
head(DT,n=3); head(DT2,n=3)

Subsetting and sorting

Define a matrix 定义矩阵

x<-matrix(c(2,3,4,5,6,7,8,NA),2,4) # 2 是行,4 是列

Binding 合并矩阵

x<-matrix(1:6,2,3)
y<-matrix(7:12,2,3)
a<-rbind(x,y) # 行合并
b<-cbind(x,y) # 列合并

Subsetting - Basics 提取Vector里的一组或者一个元素

x<- c("a","b","c","d","c","b","a")
x[1] # 选单个
x[1:4] # 选一串
x[x>"a"]; u<-x>"a"; x[u] # 筛选

Subsetting - Lists 提取list里的一组或一个元素

x<-list(foo=1:4,bar=0.6,baz="Hello")
x[1] # 输出list里第一组(有名字) list
x[[1]] # 输出list里第一组(无名字) interger
x$foo # 输出list里第一组(无名字) interger
x$bar # 输出list里第二组(无名字) numeric
x["bar"] # 输出list里第二组(有名字) list
x[["bar"]] # 输出list里第二组(无名字) numeric
x[c(1,3)] # 输出list里第一、三组(有名字) list
name<-"foo"; x[[name]] # 输出list里第一组(无名字) interger
x$name # NULL
y <- list(a = list(10,12,14), b = c(3.14, 2.81))
y[[c(1,3)]] # 第一组中第三个
y[[1]][[3]] # 第一组中第三个
y[[c(2,1)]] # 第二组中第一个
y[[2]][[2]] # 第二组中第二个

Subsetting - Matrix 提取矩阵里的一列或几列或一个元素

x <- matrix(1:6,2,3)
x[1,2] # integer
x[1,2,drop = FALSE] # matrix
x[1,] # nteger
x[1,,drop = FALSE] # matrix
x[1,2:3] # integer
x[,2:3] # matrix
x[-1,] # 选取除了第一行的元素 

Subsetting - data.frame 提取数据框架里的一列或几列或一个元素

set.seed(12345)
x <- data.frame("var1" = sample(1:5), "var2" = sample(6:10), "var3" = sample(11:15))
x <- x[sample(1:5),] # 乱序
x$var2[c(1,3)] = NA # 修改部分数据
x[,1] # intrger
x[1:2,"var1"] # intrger
x[(x$var1<=3 & x$var3 >11),] # 按条件选取
x[x$var1<=3 | x$var3 >15,] # 按条件选取
x[which(x$var2>6),] # 按条件选取

Sorting - data.frame 显示某一组元素大小的排序

sort(x$var1)
sort(x$var1, decreasing = TRUE)
sort(x$var2,na.last = T)
sort(x$var2)
x$var2

Ordering - data.frame 显示对某一组元素排序后对象的顺序

order(x$var2)
order(x$var2, na.last = FALSE)
x[order(x$var1),]
x[order(x$var1,x$var3),]

Ordering - data.frame by plyr 显示按某一组元素排序后整个frame结果

library(plyr)
arrange(x,var1)
arrange(x,desc(var1))
desc(x$var1) # 这个是取负

Adding rows and columns 添加行或列

x$var4<-rnorm(5)
y <- cbind(x,rnorm(5))

Parital Matching 元素名部分符合

Partial matching of name is allowed with [] and $

x <- list(aafewfw = 1:5)
x$a
x[["a"]] # NUL
x[["a", exact = FALSE]]
x$fw # NULL
x <- list(aafewfw = 1:5, afewaf = 6:12)
x$a # NULL
x[["a"]] # NULL
x[["a", exact = FALSE]] # NULL
x$aa

Removing NA Values 去除 NA

*Using is.na

x <- c(1,2,NA,4,NA,5)
bad <- is.na(x)
x[!bad]

*Using complete.cases 可以对不同元素对应同一个sample考虑,即选取有用sample

x <- c(1,2,NA,4,NA,5)
y <- c("a","b",NA,"d","f",NA)
good <-complete.cases(x,y)
x[good]; y[good]
x<-matrix(c(1,2,3,4,5,6,7,8,NA,10,NA,12),3,4)
good<-complete.cases(x)
x[good] # numeric
x[good,] # numeric
x[good,,drop=FALSE] # matrix

Generate factors 手动生成一组分类

x <-c(rnorm(10),runif(10),rnorm(10,1))
f<-gl(3,10) # Generate factors by specifying the pattern of their levels
tapply(x,f,mean)

Summarizing Data 概述数据

Getting the data from the web

if(!file.exists("./data")){dir.create("./data")}
fileUrl <- "https://data.baltimorecity.gov/api/views/k5ry-ef3g/rows.csv?accessType=DOWNLOAD"
download.file(fileUrl,destfile = "./data/restaurants.csv")
restData <- read.csv("./data/restaurants.csv")

Look at a bit of the data

head(restData,n=3)
tail(restData, n=3)
summary(restData)

More in depth info

str(restData)

Quantile of qunatitative variables

quantile(restData$councilDistrict,na.rm=TRUE)
quantile(restData$councilDistrict, probs=c(0.5,0.75,0.9))

Make table for some attribute

table(restData$zipCode, useNA= "ifany") # useNA is important
table(restData$councilDistrict, restData$zipCode)

Check for missing values

sum(is.na(restData$councilDistrict))
any(is.na(restData$councilDistrict))

Row and column sums

all(restData$zipCode>0)
colSums(is.na(restData))
all(colSums(is.na(restData))==0)

Values with specific characteristic

table(restData$zipCode %in% c("21212"))
table(restData$zipCode %in% c("21212","21213"))
restData[restData$zipCode %in% c("21212","21213"),]

Cross tabs

data(UCBAdmissions)
DF = as.data.frame(UCBAdmissions)
summary(DF)
xt <- xtabs(Freq ~ Gender + Admit,data=DF);xt

Flat tables

warpbreaks$replicate <- rep(1:9, len = 54)
xt = xtabs(breaks ~.,data= warpbreaks);xt
ftable(xt)

Size of a data set

fakeData <- rnorm(1e5)
object.size(fakeData)
print(object.size(fakeData),units="Mb")

Creating Variables (Feature Construction) 创建新变量(特征构造)

Creating sequences 生成序列

s1 <- seq(1,10,by=2);s1
s2 <- seq(1,10,length =3 );s2
x <-c(1,3,8,25,100); seq(along=x)

Subsetting variables 关键词筛选样本 截取子集

restData$nearMe = restData$neighborhood %in% c("Roland Park", "Homeland")
table(restData$nearMe)

Creating binary variables 按条件创建是非变量

restData$zipWrong <- ifelse(restData$zipCode <0 , yes = TRUE, no =FALSE)
restData$zipWrong <- ifelse(restData$zipCode <0 , yes = 1, no =2)
table(restData$zipWrong, restData$zipCode < 0)

Creating categorial variables 按序分组

restData$zipGroups = cut(restData$zipCode, breaks = quantile(restData$zipCode))
table(restData$zipGroups)

Easier cutting 快速剪切 快速分组

library(grid); library(lattice); library(survival); library(Formula); library(ggplot2); library(Hmisc)
restData$zipGroups = cut2(restData$zipCode,g= 4)
table(restData$zipGroups)

Creating factor variables 创建因素

restData$zcf <- factor(restData$zipCode)
restData$zcf[1:10]
class(restData$zcf)

Levels of factor variables 创建因素等级

yesno <- sample(c("yes","no"),size = 10, replace = TRUE)
yesnofac = factor(yesno, levels=c("yes","no"))
relevel(yesnofac, ref="yes")
as.numeric(yesnofac)

Using the mutate function

library(plyr)
library(Hmisc)
restData2 = mutate(restData, zipGroups=cut2(zipCode,g=4))
table(restData2$zipGroups)

Reshape Data 整理数据

Start with reshaping

library(reshape2)
head(mtcars)

Melting data frames

mtcars$carname <- rownames(mtcars)
carMelt <- melt(mtcars,id=c("carname","gear","cyl"),measure.vars=c("mpg","hp"))
head(carMelt,n=3)
tail(carMelt,n=3)

http://www.statmethods.net/management/reshape.html


Casting data frames

cylData <- dcast(carMelt, cyl ~ variable)
cylData
cylData <- dcast(carMelt, cyl ~ variable,mean)
cylData

http://www.statmethods.net/management/reshape.html


Averaging values

head(InsectSprays)
tapply(InsectSprays$count,InsectSprays$spray,sum)

http://www.r-bloggers.com/a-quick-primer-on-split-apply-combine-problems/


Another way - split

spIns =  split(InsectSprays$count,InsectSprays$spray)
spIns

Another way - apply

sprCount = lapply(spIns,sum)
sprCount

Another way - combine

unlist(sprCount)
sapply(spIns,sum)

Another way - plyr package

library(plyr)
ddply(InsectSprays,.(spray),summarize,sum=sum(count))

Creating a new variable

spraySums <- ddply(InsectSprays,.(spray),summarize,sum=ave(count,FUN=sum))
dim(spraySums)
head(spraySums)

More information


Merge Data

Peer review data

if(!file.exists("./data")){dir.create("./data")}
fileUrl1 = "https://dl.dropboxusercontent.com/u/7710864/data/reviews-apr29.csv"
fileUrl2 = "https://dl.dropboxusercontent.com/u/7710864/data/solutions-apr29.csv"
download.file(fileUrl1,destfile="./data/reviews.csv",method="curl")
download.file(fileUrl2,destfile="./data/solutions.csv",method="curl")
reviews = read.csv("./data/reviews.csv"); solutions <- read.csv("./data/solutions.csv")
head(reviews,2)
head(solutions,2)

Merging data - merge()

  • Merges data frames
  • Important parameters: x,y,by,by.x,by.y,all
names(reviews)
names(solutions)

Merging data - merge()

mergedData = merge(reviews,solutions,by.x="solution_id",by.y="id",all=TRUE)
head(mergedData)

Default - merge all common column names

intersect(names(solutions),names(reviews))
mergedData2 = merge(reviews,solutions,all=TRUE)
head(mergedData2)

Using join in the plyr package

Faster, but less full featured - defaults to left join, see help file for more

library(plyr)
df1 = data.frame(id=sample(1:10),x=rnorm(10))
df2 = data.frame(id=sample(1:10),y=rnorm(10))
arrange(join(df1,df2),id)

If you have multiple data frames

df1 = data.frame(id=sample(1:10),x=rnorm(10))
df2 = data.frame(id=sample(1:10),y=rnorm(10))
df3 = data.frame(id=sample(1:10),z=rnorm(10))
dfList = list(df1,df2,df3)
join_all(dfList)

More on merging data


Text Minig

Editing text variables

Example - Baltimore camera data

Fixing character vectors - tolower(), toupper()

if(!file.exists("./data")){dir.create("./data")}
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD"
download.file(fileUrl,destfile="./data/cameras.csv")
cameraData <- read.csv("./data/cameras.csv")
names(cameraData)
## [1] "address"      "direction"    "street"       "crossStreet" 
## [5] "intersection" "Location.1"
tolower(names(cameraData))
## [1] "address"      "direction"    "street"       "crossstreet" 
## [5] "intersection" "location.1"

Fixing character vectors - strsplit()

  • Good for automatically splitting variable names
  • Important parameters: x, split
splitNames = strsplit(names(cameraData),"\\.")
splitNames[[5]]
## [1] "intersection"
splitNames[[6]]
## [1] "Location" "1"

Quick aside - lists

mylist <- list(letters = c("A", "b", "c"), numbers = 1:3, matrix(1:25, ncol = 5))
head(mylist)
## $letters
## [1] "A" "b" "c"
## 
## $numbers
## [1] 1 2 3
## 
## [[3]]
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    6   11   16   21
## [2,]    2    7   12   17   22
## [3,]    3    8   13   18   23
## [4,]    4    9   14   19   24
## [5,]    5   10   15   20   25

http://www.biostat.jhsph.edu/~ajaffe/lec_winterR/Lecture%203.pdf


Quick aside - lists

mylist[1]
## $letters
## [1] "A" "b" "c"
mylist$letters
## [1] "A" "b" "c"
mylist[[1]]
## [1] "A" "b" "c"

http://www.biostat.jhsph.edu/~ajaffe/lec_winterR/Lecture%203.pdf


Fixing character vectors - sapply()

  • Applies a function to each element in a vector or list
  • Important parameters: X,FUN
splitNames[[6]][1]
## [1] "Location"
firstElement <- function(x){x[1]}
sapply(splitNames,firstElement)
## [1] "address"      "direction"    "street"       "crossStreet" 
## [5] "intersection" "Location"

Peer review data

fileUrl1 <- "https://dl.dropboxusercontent.com/u/7710864/data/reviews-apr29.csv"
fileUrl2 <- "https://dl.dropboxusercontent.com/u/7710864/data/solutions-apr29.csv"
download.file(fileUrl1,destfile="./data/reviews.csv")
download.file(fileUrl2,destfile="./data/solutions.csv")
reviews <- read.csv("./data/reviews.csv"); solutions <- read.csv("./data/solutions.csv")
head(reviews,2)
##   id solution_id reviewer_id      start       stop time_left accept
## 1  1           3          27 1304095698 1304095758      1754      1
## 2  2           4          22 1304095188 1304095206      2306      1
head(solutions,2)
##   id problem_id subject_id      start       stop time_left answer
## 1  1        156         29 1304095119 1304095169      2343      B
## 2  2        269         25 1304095119 1304095183      2329      C

Fixing character vectors - sub()

  • Important parameters: pattern, replacement, x
names(reviews)
## [1] "id"          "solution_id" "reviewer_id" "start"       "stop"       
## [6] "time_left"   "accept"
sub("_","",names(reviews),)
## [1] "id"         "solutionid" "reviewerid" "start"      "stop"      
## [6] "timeleft"   "accept"

Fixing character vectors - gsub()

testName <- "this_is_a_test"
sub("_","",testName)
## [1] "thisis_a_test"
gsub("_","",testName)
## [1] "thisisatest"

Finding values - grep(),grepl()

grep("Alameda",cameraData$intersection)
## [1]  4  5 36
table(grepl("Alameda",cameraData$intersection))
## 
## FALSE  TRUE 
##    77     3
cameraData2 <- cameraData[!grepl("Alameda",cameraData$intersection),]

More on grep()

grep("Alameda",cameraData$intersection,value=TRUE)
## [1] "The Alameda  & 33rd St"   "E 33rd  & The Alameda"   
## [3] "Harford \n & The Alameda"
grep("JeffStreet",cameraData$intersection)
## integer(0)
length(grep("JeffStreet",cameraData$intersection))
## [1] 0

http://www.biostat.jhsph.edu/~ajaffe/lec_winterR/Lecture%203.pdf


More useful string functions

library(stringr)
nchar("Jeffrey Leek")
## [1] 12
substr("Jeffrey Leek",1,7)
## [1] "Jeffrey"
paste("Jeffrey","Leek")
## [1] "Jeffrey Leek"

More useful string functions

paste0("Jeffrey","Leek")
## [1] "JeffreyLeek"
str_trim("Jeff      ")
## [1] "Jeff"

Important points about text in data sets

  • Names of variables should be
  • All lower case when possible
  • Descriptive (Diagnosis versus Dx)
  • Not duplicated
  • Not have underscores or dots or white spaces
  • Variables with character values
  • Should usually be made into factor variables (depends on application)
  • Should be descriptive (use TRUE/FALSE instead of 0/1 and Male/Female versus 0/1 or M/F)

Regular expressions

  • Regular expressions can be thought of as a combination of literals and metacharacters
  • To draw an analogy with natural language, think of literal text forming the words of this language, and the metacharacters defining its grammar
  • Regular expressions have a rich set of metacharacters

Literals

Simplest pattern consists only of literals. The literal “nuclear” would match to the following lines:

Ooh. I just learned that to keep myself alive after a
nuclear blast! All I have to do is milk some rats
then drink the milk. Aweosme. :}

Laozi says nuclear weapons are mas macho

Chaos in a country that has nuclear weapons -- not good.

my nephew is trying to teach me nuclear physics, or
possibly just trying to show me how smart he is
so I’ll be proud of him [which I am].

lol if you ever say "nuclear" people immediately think
DEATH by radiation LOL

Literals

The literal “Obama” would match to the following lines

Politics r dum. Not 2 long ago Clinton was sayin Obama
was crap n now she sez vote 4 him n unite? WTF?
Screw em both + Mcain. Go Ron Paul!

Clinton conceeds to Obama but will her followers listen??

Are we sure Chelsea didn’t vote for Obama?

thinking ... Michelle Obama is terrific!

jetlag..no sleep...early mornig to starbux..Ms. Obama
was moving

Regular Expressions

  • Simplest pattern consists only of literals; a match occurs if the sequence of literals occurs anywhere in the text being tested

  • What if we only want the word “Obama”? or sentences that end in the word “Clinton”, or “clinton” or “clinto”?


Regular Expressions

We need a way to express - whitespace word boundaries - sets of literals - the beginning and end of a line - alternatives (“war” or “peace”) Metacharacters to the rescue!


Metacharacters

Some metacharacters represent the start of a line

^i think

will match the lines

i think we all rule for participating
i think i have been outed
i think this will be quite fun actually
i think i need to go to work
i think i first saw zombo in 1999.

Metacharacters

$ represents the end of a line

morning$

will match the lines

well they had something this morning
then had to catch a tram home in the morning
dog obedience school in the morning
and yes happy birthday i forgot to say it earlier this morning
I walked in the rain this morning
good morning

Character Classes with

We can list a set of characters we will accept at a given point in the match

[Bb][Uu][Ss][Hh]

will match the lines

The democrats are playing, "Name the worst thing about Bush!"
I smelled the desert creosote bush, brownies, BBQ chicken
BBQ and bushwalking at Molonglo Gorge
Bush TOLD you that North Korea is part of the Axis of Evil
I’m listening to Bush - Hurricane (Album Version)

Character Classes with

^[Ii] am

will match

i am so angry at my boyfriend i can’t even bear to
look at him

i am boycotting the apple store

I am twittering from iPhone

I am a very vengeful person when you ruin my sweetheart.

I am so over this. I need food. Mmmm bacon...

Character Classes with

Similarly, you can specify a range of letters [a-z] or [a-zA-Z]; notice that the order doesn’t matter

^[0-9][a-zA-Z]

will match the lines

7th inning stretch
2nd half soon to begin. OSU did just win something
3am - cant sleep - too hot still.. :(
5ft 7 sent from heaven
1st sign of starvagtion

Character Classes with

When used at the beginning of a character class, the “^” is also a metacharacter and indicates matching characters NOT in the indicated class

[^?.]$

will match the lines

i like basketballs
6 and 9
dont worry... we all die anyway!
Not in Baghdad
helicopter under water? hmmm

More Metacharacters

“.” is used to refer to any character. So

9.11

will match the lines

its stupid the post 9-11 rules
if any 1 of us did 9/11 we would have been caught in days.
NetBios: scanning ip 203.169.114.66
Front Door 9:11:46 AM
Sings: 0118999881999119725...3 !

More Metacharacters: |

This does not mean “pipe” in the context of regular expressions; instead it translates to “or”; we can use it to combine two expressions, the subexpressions being called alternatives

flood|fire

will match the lines

is firewire like usb on none macs?
the global flood makes sense within the context of the bible
yeah ive had the fire on tonight
... and the floods, hurricanes, killer heatwaves, rednecks, gun nuts, etc.


More Metacharacters: |

We can include any number of alternatives…

flood|earthquake|hurricane|coldfire

will match the lines

Not a whole lot of hurricanes in the Arctic.
We do have earthquakes nearly every day somewhere in our State
hurricanes swirl in the other direction
coldfire is STRAIGHT!
’cause we keep getting earthquakes

More Metacharacters: |

The alternatives can be real expressions and not just literals

^[Gg]ood|[Bb]ad

will match the lines

good to hear some good knews from someone here
Good afternoon fellow american infidels!
good on you-what do you drive?
Katie... guess they had bad experiences...
my middle name is trouble, Miss Bad News

More Metacharacters: ( and )

Subexpressions are often contained in parentheses to constrain the alternatives

^([Gg]ood|[Bb]ad)

will match the lines

bad habbit
bad coordination today
good, becuase there is nothing worse than a man in kinky underwear
Badcop, its because people want to use drugs
Good Monday Holiday
Good riddance to Limey

More Metacharacters: ?

The question mark indicates that the indicated expression is optional

[Gg]eorge( [Ww]\.)? [Bb]ush

will match the lines

i bet i can spell better than you and george bush combined
BBC reported that President George W. Bush claimed God told him to invade I
a bird in the hand is worth two george bushes

One thing to note…

In the following

[Gg]eorge( [Ww]\.)? [Bb]ush

we wanted to match a “.” as a literal period; to do that, we had to “escape” the metacharacter, preceding it with a backslash In general, we have to do this for any metacharacter we want to include in our match


More metacharacters: * and +

The * and + signs are metacharacters used to indicate repetition; * means “any number, including none, of the item” and + means “at least one of the item”

(.*)

will match the lines

anyone wanna chat? (24, m, germany)
hello, 20.m here... ( east area + drives + webcam )
(he means older men)
()

More metacharacters: * and +

The * and + signs are metacharacters used to indicate repetition; * means “any number, including none, of the item” and + means “at least one of the item”

[0-9]+ (.*)[0-9]+

will match the lines

working as MP here 720 MP battallion, 42nd birgade
so say 2 or 3 years at colleage and 4 at uni makes us 23 when and if we fin
it went down on several occasions for like, 3 or 4 *days*
Mmmm its time 4 me 2 go 2 bed

More metacharacters: { and }

{ and } are referred to as interval quantifiers; the let us specify the minimum and maximum number of matches of an expression

[Bb]ush( +[^ ]+ +){1,5} debate

will match the lines

Bush has historically won all major debates he’s done.
in my view, Bush doesn’t need these debates..
bush doesn’t need the debates? maybe you are right
That’s what Bush supporters are doing about the debate.
Felix, I don’t disagree that Bush was poorly prepared for the debate.
indeed, but still, Bush should have taken the debate more seriously.
Keep repeating that Bush smirked and scowled during the debate

More metacharacters: and

  • m,n means at least m but not more than n matches
  • m means exactly m matches
  • m, means at least m matches

More metacharacters: ( and ) revisited

  • In most implementations of regular expressions, the parentheses not only limit the scope of alternatives divided by a “|”, but also can be used to “remember” text matched by the subexpression enclosed
  • We refer to the matched text with , , etc.

More metacharacters: ( and ) revisited

So the expression

+([a-zA-Z]+) +\1 +

will match the lines

time for bed, night night twitter!
blah blah blah blah
my tattoo is so so itchy today
i was standing all all alone against the world outside...
hi anybody anybody at home
estudiando css css css css.... que desastritooooo

More metacharacters: ( and ) revisited

The * is “greedy” so it always matches the longest possible string that satisfies the regular expression. So

^s(.*)s

matches

sitting at starbucks
setting up mysql and rails
studying stuff for the exams
spaghetti with marshmallows
stop fighting with crackers
sore shoulders, stupid ergonomics

More metacharacters: ( and ) revisited

The greediness of * can be turned off with the ?, as in

^s(.*?)s$

Summary

  • Regular expressions are used in many different languages; not unique to R.
  • Regular expressions are composed of literals and metacharacters that represent sets or classes of characters/words
  • Text processing via regular expressions is a very powerful way to extract data from “unfriendly” sources (not all data comes as a CSV file)
  • Used with the functions grep,grepl,sub,gsub and others that involve searching for text strings (Thanks to Mark Hansen for some material in this lecture.)

Working with dates

Starting simple

d1 = date()
d1
## [1] "Mon Mar 07 16:59:57 2016"
class(d1)
## [1] "character"

Date class

d2 = Sys.Date()
d2
## [1] "2016-03-07"
class(d2)
## [1] "Date"

Formatting dates

%d = day as number (0-31), %a = abbreviated weekday,%A = unabbreviated weekday, %m = month (00-12), %b = abbreviated month, %B = unabbrevidated month, %y = 2 digit year, %Y = four digit year

format(d2,"%a %b %d")
## [1] "Mon Mar 07"

Creating dates

x = c("1jan1960", "2jan1960", "31mar1960", "30jul1960"); z = as.Date(x, "%d%b%Y")
z
## [1] "1960-01-01" "1960-01-02" "1960-03-31" "1960-07-30"
z[1] - z[2]
## Time difference of -1 days
as.numeric(z[1]-z[2])
## [1] -1

Converting to Julian

weekdays(d2)
## [1] "Monday"
months(d2)
## [1] "March"
julian(d2)
## [1] 16867
## attr(,"origin")
## [1] "1970-01-01"

Lubridate

library(lubridate); ymd("20140108")
## [1] "2014-01-08 UTC"
mdy("08/04/2013")
## [1] "2013-08-04 UTC"
dmy("03-04-2013")
## [1] "2013-04-03 UTC"

[http://www.r-statistics.com/2012/03/do-more-with-dates-and-times-in-r-with-lubridate-1-1-0/]


Dealing with times

ymd_hms("2011-08-03 10:15:03")
ymd_hms("2011-08-03 10:15:03",tz="Pacific/Auckland")
?Sys.timezone

[http://www.r-statistics.com/2012/03/do-more-with-dates-and-times-in-r-with-lubridate-1-1-0/]


Some functions have slightly different syntax

x = dmy(c("1jan2013", "2jan2013", "31mar2013", "30jul2013"))
wday(x[1])
## [1] 3
wday(x[1],label=TRUE)
## [1] Tues
## Levels: Sun < Mon < Tues < Wed < Thurs < Fri < Sat

Notes and further resources


Exploratory Analysis 考察分析 – Plotting System


The core plotting and graphics engine in R is encapsulated in the following packages:


Base Graphics

Some Important Base Graphics Parameters

Many base plotting functions share a set of parameters. Here are a few key ones:

  • pch: the plotting symbol (default is open circle)
  • lty: the line type (default is solid line), can be dashed, dotted, etc.
  • lwd: the line width, specified as an integer multiple
  • col: the plotting color, specified as a number, string, or hex code; the colors() function gives you a vector of colors by name
  • xlab: character string for the x-axis label
  • ylab: character string for the y-axis label
  • type: “l” line

Some Important Base Graphics Parameters

The par() function is used to specify global graphics parameters that affect all plots in an R session. These parameters can be overridden when specified as arguments to specific plotting functions.

  • las: the orientation of the axis labels on the plot
  • bg: the background color
  • mar: the margin size
  • oma: the outer margin size (default is 0 for all sides)
  • mfrow: number of plots per row, column (plots are filled row-wise)
  • mfcol: number of plots per row, column (plots are filled column-wise)

Base Plotting Functions

  • plot: make a scatterplot, or other type of plot depending on the class of the object being plotted
  • lines: add lines to a plot, given a vector x values and a corresponding vector of y values (or a 2-column matrix); this function just connects the dots
  • points: add points to a plot
  • text: add text labels to a plot using specified x, y coordinates
  • title: add annotations to x, y axis labels, title, subtitle, outer margin
  • mtext: add arbitrary text to the margins (inner or outer) of the plot
  • axis: adding axis ticks/labels

downloadData

download.file("https://dl.dropboxusercontent.com/u/7710864/data/csv_hid/ss06pid.csv",destfile="ss06pid.csv")
pData <- read.csv("ss06pid.csv")

Boxplots

Important parameters: col, varwidth, names, horizontal

boxplot(pData$AGEP, col="blue")


  • Boxplots - by factors
pData <- transform(pData, DDRS = factor(DDRS))
boxplot(AGEP ~ DDRS, data = pData, col = "blue", xlab = "DDRS", ylab = "AGEP")

boxplot(AGEP ~ DDRS, data = pData, col = c("blue", "orange"), 
        names = c("yes", "no"), varwidth = TRUE)
abline(h=60)


Barplot

barplot(table(pData$CIT), col = "blue",main = "Count Numbers")


Histograms

Important parameters: breaks,freq,col,xlab,ylab, _xlim, ylim ,main

hist(pData$AGEP, col = "green")
rug(pData$AGEP)
abline(v= 50, lwd = 2)
abline(v = median(pData$AGEP),col = "magenta",lwd = 8)

hist(pData$AGEP, col = "blue", breaks = 100, main = "Age")

par(mfrow = c(2,1),mar = c(5,4,2,1))
hist(subset(pData,SEX == "1")$AGEP,col = "green")
hist(subset(pData,SEX == "1")$AGEP,col = "green")


Scatterplots

Important paramters: x, y, type, xlab, ylab, xlim, ylim, cex, col, bg

plot(pData$JWMNP, pData$WAGP, pch = 19, col = "blue")

  • Scatterplots - plotting symbol size
plot(pData$JWMNP, pData$WAGP, pch = 19, col = "blue", cex = 0.5)

  • Scatterplots - using color
plot(pData$JWMNP, pData$WAGP, pch = 19, col = pData$SEX, cex = 0.5)

*Scatterplots - using size

percentMaxAge <- pData$AGEP/max(pData$AGEP)
plot(pData$JWMNP, pData$WAGP, pch = 19, col = "blue", cex = percentMaxAge * 
    0.5)

  • Scatterplots - overlaying lines/points
plot(pData$JWMNP, pData$WAGP, pch = 19, col = "blue", cex = 0.5)
lines(rep(100, dim(pData)[1]), pData$WAGP, col = "grey", lwd = 5)
points(seq(0, 200, length = 100), seq(0, 2e+06, length = 100), col = "red", 
    pch = 19)

  • Scatterplots - numeric variables as factors
library(lattice);library(survival);library(Formula);library(ggplot2);library(Hmisc)
ageGroups <- cut2(pData$AGEP,g=5)
plot(pData$JWMNP,pData$WAGP,pch=19,col=ageGroups,cex=0.5)

  • Scatterplots - by with function
with(pData, plot(JWMNP, WAGP, col = SEX))
title(main = "HAHAHA")
abline(h = 1e+05, lwd = 2, lty = 3)

with(subset(pData, SEX == "1"), plot(JWMNP, WAGP, main = "sex=1"))

  • Scatterplots - by with & points function step by step
library(datasets)
with(airquality, plot(Wind, Ozone, main = "Ozone and Wind in New York City", 
    type = "n"))
with(subset(airquality, Month == 5), points(Wind, Ozone, col = "blue"))
with(subset(airquality, Month != 5), points(Wind, Ozone, col = "red"))
legend("topright", pch = 1, col = c("blue", "red"), legend = c("May", "Other Months"))

  • Scatterplots - (Multiple Base Plots)plot by groups on different pars
par(mfrow = c(1, 3), mar = c(4, 4, 2, 1), oma = c(0, 0, 2, 0))
with(airquality, {
    plot(Wind, Ozone, main = "Ozone and Wind")
    plot(Solar.R, Ozone, main = "Ozone and Solar Radiation")
    plot(Temp, Ozone, main = "Ozone and Temperature")
    mtext("Ozone and Weather in New York City", outer = T)
})


Graphics Devices in R


Graphics File Devices

There are two basic types of file devices: vector and bitmap devices


  • Vector formats
  • pdf: useful for line-type graphics, resizes well, usually portable, not efficient if a plot has many objects/points
  • svg: XML-based scalable vector graphics; supports animation and interactivity, potentially useful for web-based plots
  • win.metafile: Windows metafile format (only on Windows)
  • postscript: older format, also resizes well, usually portable, can be used to create encapsulated postscript files; Windows systems often don’t have a postscript viewer

  • Bitmap formats
  • png: bitmapped format, good for line drawings or images with solid colors, uses lossless compression (like the old GIF format), most web browsers can read this format natively, good for plotting many many many points, does not resize well
  • jpeg: good for photographs or natural scenes, uses lossy compression, good for plotting many many many points, does not resize well, can be read by almost any computer and any web browser, not great for line drawings
  • tiff: Creates bitmap files in the TIFF format; supports lossless compression
  • bmp: a native Windows bitmapped format

File devices example:

    1. Explicitly launch a graphics device
    1. Call a plotting function to make a plot (Note: if you are using a file
  • device, no plot will appear on the screen)
    1. Annotate plot if necessary
    1. Explicitly close graphics device with dev.off() (this is very important!)
pdf(file = "myplot.pdf")  ## Open PDF device; create 'myplot.pdf' in my working directory
## Create plot and send to a file (no plot appears on screen)
with(faithful, plot(eruptions, waiting))  
title(main = "Old Faithful Geyser data")  ## Annotate plot; still nothing on screen
dev.off()  ## Close the PDF file device
## Now you can view the file 'myplot.pdf' on your computer

Copying Plots(save plots)

Copying a plot to another device can be useful because some plots require a lot of code and it can be a pain to type all that in again for a different device.

  • dev.copy: copy a plot from one device to another
  • dev.copy2pdf: specifically copy a plot to a PDF file

NOTE: Copying a plot is not an exact operation, so the result may not be identical to the original.

Copy example:

library(datasets)
with(faithful, plot(eruptions, waiting))  ## Create plot on screen device
title(main = "Old Faithful Geyser data")  ## Add a main title
dev.copy(png, file = "geyserplot.png")  ## Copy my plot to a PNG file
dev.off()  ## Don't forget to close the PNG device!

Lattice Functions

  • xyplot: this is the main function for creating scatterplots
  • bwplot: box-and-whiskers plots (“boxplots, ?)
  • histogram: histograms
  • stripplot: like a boxplot but with actual points
  • dotplot: plot dots on “violin strings”
  • splom: scatterplot matrix; like pairs in base plotting system
  • levelplot, contourplot: for plotting “image” data

Simple Lattice Plot

library(datasets)
library(lattice)
## Convert 'Month' to a factor variable
airquality <- transform(airquality, Month = factor(Month)) 
xyplot(Ozone ~ Wind | Month, data = airquality, layout = c(5, 1))


Lattice Behavior

p <- xyplot(Ozone ~ Wind, data = airquality)  ## Nothing happens!
print(p)  ## Plot appears

xyplot(Ozone ~ Wind, data = airquality)  ## Auto-printing

Lattice Panel Functions

set.seed(10)
x <- rnorm(100)
f <- rep(0:1, each = 50)
y <- x + f - f * x+ rnorm(100, sd = 0.5)
f <- factor(f, labels = c("Group 1", "Group 2"))
xyplot(y ~ x | f, layout = c(2, 1))  ## Plot with 2 panels

  • Lattice Panel Functions
## Custom panel function
xyplot(y ~ x | f, panel = function(x, y, ...) {
       panel.xyplot(x, y, ...)  ## First call the default panel function for 'xyplot'
       panel.abline(h = median(y), lty = 2)  ## Add a horizontal line at the median
})

  • Lattice Panel Functions: Regression line
## Custom panel function
xyplot(y ~ x | f, panel = function(x, y, ...) {
               panel.xyplot(x, y, ...)  ## First call default panel function
               panel.lmline(x, y, col = 2)  ## Overlay a simple linear regression line
       })

  • Many Panel Lattice Plot
env <- readRDS("maacs_env.RDS")
env <- transform(env, MxNum = factor(MxNum))
xyplot(log2(airmus) ~ VisitNum | MxNum, data = env, strip = FALSE, pch = 20, xlab = "Visit Number", ylab = expression(Log[2] * " Airborne Mouse Allergen"), main = "Mouse Allergen and Asthma Cohort Study (Baltimore City)")


The ggplot2 Plotting System

Basic Components of a ggplot2 Plot

  • A data frame
  • aesthetic mappings: how data are mapped to color, size
  • geoms: geometric objects like points, lines, shapes.
  • facets: for conditional plots.
  • stats: statistical transformations like binning, quantiles, smoothing.
  • scales: what scale an aesthetic map uses (example: male = red, female = blue).
  • coordinate system

The Basics: qplot()

  • Works much like the plot function in base graphics system
  • Looks for data in a data frame, similar to lattice, or in the parent environment
  • Plots are made up of aesthetics (size, shape, color) and geoms (points, lines)
  • Factors are important for indicating subsets of the data (if they are to have different properties); they should be labeled
  • The qplot() hides what goes on underneath, which is okay for most operations
  • ggplot() is the core function and very flexible for doing things qplot() cannot do

Example Dataset

library(ggplot2)
  • ggplot2 Basic
qplot(displ, hwy, data = mpg)

  • Modifying aesthetics (color group/ shape group)
qplot(displ, hwy, data = mpg, color = drv)

qplot(displ, hwy, data = mpg, shape = drv)

  • Adding a geom
qplot(displ, hwy, data = mpg, geom = c("point", "smooth"))

qplot(displ, hwy, data = mpg, geom = c("point", "smooth"), facets = . ~ drv)

  • Histograms
qplot(hwy, data = mpg, fill = drv)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  • Density Smooth
qplot(hwy, data = mpg, geom = "density", color = drv)

  • Facets
qplot(displ, hwy, data = mpg, facets = . ~ drv)

qplot(hwy, data = mpg, facets = drv ~., binwidth = 2)

qplot(hwy, data = mpg, facets = .~ drv, binwidth = 2)


Building Up in Layers - ggplot()


Basic Plot

load data

load("maacs.Rda")
  • Basic Plot
library(ggplot2)
qplot(logpm25, NocturnalSympt, data = maacs, facets = . ~ bmicat, 
      geom = c("point", "smooth"))

  • Building Up in Layers
g <- ggplot(maacs, aes(logpm25, NocturnalSympt))
summary(g)
## data: id, eno, duBedMusM, pm25, mopos, logpm25, NocturnalSympt,
##   bmicat, logno2_new [750x9]
## mapping:  x = logpm25, y = NocturnalSympt
## faceting: facet_null()
  • Print Plot
g <- ggplot(maacs, aes(logpm25, NocturnalSympt))
print(g)

  • First Plot with Point Layer
g <- ggplot(maacs, aes(logpm25, NocturnalSympt))
g + geom_point()

*Adding More Layers: Smooth

g + geom_point() + geom_smooth()

g + geom_point() + geom_smooth(method = "lm")

  • Adding More Layers: Facets
g + geom_point() + facet_grid(. ~ bmicat) + geom_smooth(method = "lm")


Modifying

  • Annotation
  • Labels: xlab(), ylab(), labs(), ggtitle()
  • Each of the geom functions has options to modify
  • For things that only make sense globally, use theme()
  • Example: theme(legend.position = "none")
  • Two standard appearance themes are included
  • theme_gray(): The default theme (gray background)
  • theme_bw(): More stark/plain

  • Modifying Aesthetics
g + geom_point(color = "steelblue", size = 4, alpha = 1/2)

g + geom_point(aes(color = bmicat), size = 4, alpha = 1/2)

  • Modifying Labels
g + geom_point(aes(color = bmicat)) + labs(title = "MAACS Cohort") + 
  labs(x = expression("log " * PM[2.5]), y = "Nocturnal Symptoms")

  • Customizing the Smooth
g + geom_point(aes(color = bmicat), size = 2, alpha = 1/2) + 
  geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE)

  • Changing the Theme
g + geom_point(aes(color = bmicat)) + theme_bw(base_family = "Times")

  • A Note about Axis Limits
testdat <- data.frame(x = 1:100, y = rnorm(100))
testdat[50,2] <- 100  ## Outlier!
plot(testdat$x, testdat$y, type = "l", ylim = c(-3,3))

g <- ggplot(testdat, aes(x = x, y = y))
g + geom_line()

  • Axis Limits
g + geom_line() + ylim(-3, 3)

g + geom_line() + coord_cartesian(ylim = c(-3, 3))


More Complex Example

  • How does the relationship between PM\(_{2.5}\) and nocturnal symptoms vary by BMI and NO\(_2\)?
  • Unlike our previous BMI variable, NO\(_2\) is continuous
  • We need to make NO\(_2\) categorical so we can condition on it in the plotting
  • Use the cut() function for this

  • Making NO\(_2\) Tertiles
## Calculate the tertiles of the data
cutpoints <- quantile(maacs$logno2_new, seq(0, 1, length = 4), na.rm = TRUE)
## Cut the data at the tertiles and create a new factor variable
maacs$no2tert <- cut(maacs$logno2_new, cutpoints)
## See the levels of the newly created factor variable
levels(maacs$no2tert)
## [1] "(-0.629,1.18]" "(1.18,1.44]"   "(1.44,2.48]"
  • Code for Final Plot
## Setup ggplot with data frame
g <- ggplot(maacs, aes(logpm25, NocturnalSympt))
## Add layers
g + geom_point(alpha = 1/3) + 
  facet_wrap(bmicat ~ no2tert, nrow = 2, ncol = 4) + 
  geom_smooth(method="lm", se=FALSE, col="steelblue") + 
  theme_bw(base_family = "Avenir", base_size = 10) + 
  labs(x = expression("log " * PM[2.5])) + 
  labs(y = "Nocturnal Symptoms") + 
  labs(title = "MAACS Cohort")

g


Hierarchical Clustering


Hierarchical clustering - example generation

set.seed(1234); par(mar=c(0,0,0,0))
x <- rnorm(12,mean=rep(1:3,each=4),sd=0.2)
y <- rnorm(12,mean=rep(c(1,2,1),each=4),sd=0.2)
plot(x,y,col="blue",pch=19,cex=2)
text(x+0.05,y+0.05,labels=as.character(1:12))


Hierarchical clustering - dist

  • Important parameters: x,method
dataFrame <- data.frame(x=x,y=y)
dist(dataFrame)
##             1          2          3          4          5          6
## 2  0.34120511                                                       
## 3  0.57493739 0.24102750                                            
## 4  0.26381786 0.52578819 0.71861759                                 
## 5  1.69424700 1.35818182 1.11952883 1.80666768                      
## 6  1.65812902 1.31960442 1.08338841 1.78081321 0.08150268           
## 7  1.49823399 1.16620981 0.92568723 1.60131659 0.21110433 0.21666557
## 8  1.99149025 1.69093111 1.45648906 2.02849490 0.61704200 0.69791931
## 9  2.13629539 1.83167669 1.67835968 2.35675598 1.18349654 1.11500116
## 10 2.06419586 1.76999236 1.63109790 2.29239480 1.23847877 1.16550201
## 11 2.14702468 1.85183204 1.71074417 2.37461984 1.28153948 1.21077373
## 12 2.05664233 1.74662555 1.58658782 2.27232243 1.07700974 1.00777231
##             7          8          9         10         11
## 2                                                        
## 3                                                        
## 4                                                        
## 5                                                        
## 6                                                        
## 7                                                        
## 8  0.65062566                                            
## 9  1.28582631 1.76460709                                 
## 10 1.32063059 1.83517785 0.14090406                      
## 11 1.37369662 1.86999431 0.11624471 0.08317570           
## 12 1.17740375 1.66223814 0.10848966 0.19128645 0.20802789

Hierarchical clustering - #1

suppressMessages(library(fields))
dataFrame <- data.frame(x=x,y=y)
rdistxy <- rdist(dataFrame)
diag(rdistxy) <- diag(rdistxy) + 1e5
# Find the index of the points with minimum distance
ind <- which(rdistxy == min(rdistxy),arr.ind=TRUE)
par(mfrow=c(1,2),mar=rep(0.2,4))
# Plot the points with the minimum overlayed
plot(x,y,col="blue",pch=19,cex=2)
text(x+0.05,y+0.05,labels=as.character(1:12))
points(x[ind[1,]],y[ind[1,]],col="orange",pch=19,cex=2)
# Make a cluster and cut it at the right height
distxy <- dist(dataFrame)
hcluster <- hclust(distxy)
dendro <- as.dendrogram(hcluster)
cutDendro <- cut(dendro,h=(hcluster$height[1]+0.00001) )
plot(cutDendro$lower[[11]],yaxt="n")


Hierarchical clustering - #2

library(fields)
dataFrame <- data.frame(x=x,y=y)
rdistxy <- rdist(dataFrame)
diag(rdistxy) <- diag(rdistxy) + 1e5
# Find the index of the points with minimum distance
ind <- which(rdistxy == min(rdistxy),arr.ind=TRUE)
par(mar=rep(0.2,4))
# Plot the points with the minimum overlayed
plot(x,y,col="blue",pch=19,cex=2)
text(x+0.05,y+0.05,labels=as.character(1:12))
points(x[ind[1,]],y[ind[1,]],col="orange",pch=19,cex=2)
points(mean(x[ind[1,]]),mean(y[ind[1,]]),col="black",cex=3,lwd=3,pch=3)
points(mean(x[ind[1,]]),mean(y[ind[1,]]),col="orange",cex=5,lwd=3,pch=1)


Hierarchical clustering - #3

library(fields)
dataFrame <- data.frame(x=x,y=y)
rdistxy <- rdist(dataFrame)
diag(rdistxy) <- diag(rdistxy) + 1e5
# Find the index of the points with minimum distance
ind <- which(rdistxy == rdistxy[order(rdistxy)][3],arr.ind=TRUE)
par(mfrow=c(1,3),mar=rep(0.2,4))
# Plot the points with the minimum overlayed
plot(x,y,col="blue",pch=19,cex=2)
text(x+0.05,y+0.05,labels=as.character(1:12))
points(x[c(5,6)],y[c(5,6)],col="orange",pch=19,cex=2)
points(x[ind[1,]],y[ind[1,]],col="red",pch=19,cex=2)
# Make dendogram plots
distxy <- dist(dataFrame)
hcluster <- hclust(distxy)
dendro <- as.dendrogram(hcluster)
cutDendro <- cut(dendro,h=(hcluster$height[2]) )
plot(cutDendro$lower[[10]],yaxt="n")
plot(cutDendro$lower[[5]],yaxt="n")


Hierarchical clustering - hclust

dataFrame <- data.frame(x=x,y=y)
distxy <- dist(dataFrame)
hClustering <- hclust(distxy)
plot(hClustering)


Prettier dendrograms

myplclust <- function( hclust, lab=hclust$labels, lab.col=rep(1,length(hclust$labels)), hang=0.1,...){
  ## modifiction of plclust for plotting hclust objects *in colour*!
  ## Copyright Eva KF Chan 2009
  ## Arguments:
  ##    hclust:    hclust object
  ##    lab:        a character vector of labels of the leaves of the tree
  ##    lab.col:    colour for the labels; NA=default device foreground colour
  ##    hang:     as in hclust & plclust
  ## Side effect:
  ##    A display of hierarchical cluster with coloured leaf labels.
  y <- rep(hclust$height,2); x <- as.numeric(hclust$merge)
  y <- y[which(x<0)]; x <- x[which(x<0)]; x <- abs(x)
  y <- y[order(x)]; x <- x[order(x)]
  plot( hclust, labels=FALSE, hang=hang, ... )
  text( x=x, y=y[hclust$order]-(max(hclust$height)*hang),
        labels=lab[hclust$order], col=lab.col[hclust$order], 
        srt=90, adj=c(1,0.5), xpd=NA, ... )
}
dataFrame <- data.frame(x=x,y=y)
distxy <- dist(dataFrame)
hClustering <- hclust(distxy)
myplclust(hClustering,lab=rep(1:3,each=4),lab.col=rep(1:3,each=4))


Merging Points

  • Merging points - complete
dataFrame <- data.frame(x=x,y=y)
par(mar=rep(0.1,4))
plot(x,y,col="blue",pch=19,cex=2)
points(x[8],y[8],col="orange",pch=3,lwd=3,cex=3)
points(x[1],y[1],col="orange",pch=3,lwd=3,cex=3)
segments(x[8],y[8],x[1],y[1],lwd=3,col="orange")

  • Merging points - average
dataFrame <- data.frame(x=x,y=y)
par(mar=rep(0.1,4))
plot(x,y,col="blue",pch=19,cex=2)
points(mean(x[1:4]),mean(y[1:4]),col="orange",pch=3,lwd=3,cex=3)
points(mean(x[5:8]),mean(y[5:8]),col="orange",pch=3,lwd=3,cex=3)
segments(mean(x[1:4]),mean(y[1:4]),mean(x[5:8]),mean(y[5:8]),lwd=3,col="orange")


heatmap()

dataFrame <- data.frame(x=x,y=y)
set.seed(143)
dataMatrix <- as.matrix(dataFrame)[sample(1:12),]
heatmap(dataMatrix)


K-means Clustering


K-means clustering - starting centroids

par(mar=rep(0.2,4))
plot(x,y,col="blue",pch=19,cex=2)
text(x+0.05,y+0.05,labels=as.character(1:12))
cx <- c(1,1.8,2.5)
cy <- c(2,1,1.5)
points(cx,cy,col=c("red","orange","purple"),pch=3,cex=2,lwd=2)


K-means clustering - assign to closest centroid

par(mar=rep(0.2,4))
plot(x,y,col="blue",pch=19,cex=2)
cols1 <- c("red","orange","purple")
text(x+0.05,y+0.05,labels=as.character(1:12))
cx <- c(1,1.8,2.5)
cy <- c(2,1,1.5)
points(cx,cy,col=cols1,pch=3,cex=2,lwd=2)
## Find the closest centroid
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-cx[1])^2 + (y-cy[1])^2
distTmp[2,] <- (x-cx[2])^2 + (y-cy[2])^2
distTmp[3,] <- (x-cx[3])^2 + (y-cy[3])^2
newClust <- apply(distTmp,2,which.min)
points(x,y,pch=19,cex=2,col=cols1[newClust])


K-means clustering - recalculate centroids

par(mar=rep(0.2,4))
plot(x,y,col="blue",pch=19,cex=2)
cols1 <- c("red","orange","purple")
text(x+0.05,y+0.05,labels=as.character(1:12))
## Find the closest centroid
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-cx[1])^2 + (y-cy[1])^2
distTmp[2,] <- (x-cx[2])^2 + (y-cy[2])^2
distTmp[3,] <- (x-cx[3])^2 + (y-cy[3])^2
newClust <- apply(distTmp,2,which.min)
points(x,y,pch=19,cex=2,col=cols1[newClust])
newCx <- tapply(x,newClust,mean)
newCy <- tapply(y,newClust,mean)
## Old centroids
cx <- c(1,1.8,2.5)
cy <- c(2,1,1.5)
points(newCx,newCy,col=cols1,pch=3,cex=2,lwd=2)


K-means clustering - reassign values

par(mar=rep(0.2,4))
plot(x,y,col="blue",pch=19,cex=2)
cols1 <- c("red","orange","purple")
text(x+0.05,y+0.05,labels=as.character(1:12))
cx <- c(1,1.8,2.5)
cy <- c(2,1,1.5)
## Find the closest centroid
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-cx[1])^2 + (y-cy[1])^2
distTmp[2,] <- (x-cx[2])^2 + (y-cy[2])^2
distTmp[3,] <- (x-cx[3])^2 + (y-cy[3])^2
newClust <- apply(distTmp,2,which.min)
newCx <- tapply(x,newClust,mean)
newCy <- tapply(y,newClust,mean)
## Old centroids
points(newCx,newCy,col=cols1,pch=3,cex=2,lwd=2)
## Iteration 2
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-newCx[1])^2 + (y-newCy[1])^2
distTmp[2,] <- (x-newCx[2])^2 + (y-newCy[2])^2
distTmp[3,] <- (x-newCx[3])^2 + (y-newCy[3])^2
newClust2 <- apply(distTmp,2,which.min)
points(x,y,pch=19,cex=2,col=cols1[newClust2])


K-means clustering - update centroids

par(mar=rep(0.2,4))
plot(x,y,col="blue",pch=19,cex=2)
cols1 <- c("red","orange","purple")
text(x+0.05,y+0.05,labels=as.character(1:12))
cx <- c(1,1.8,2.5)
cy <- c(2,1,1.5)
## Find the closest centroid
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-cx[1])^2 + (y-cy[1])^2
distTmp[2,] <- (x-cx[2])^2 + (y-cy[2])^2
distTmp[3,] <- (x-cx[3])^2 + (y-cy[3])^2
newClust <- apply(distTmp,2,which.min)
newCx <- tapply(x,newClust,mean)
newCy <- tapply(y,newClust,mean)
## Iteration 2
distTmp <- matrix(NA,nrow=3,ncol=12)
distTmp[1,] <- (x-newCx[1])^2 + (y-newCy[1])^2
distTmp[2,] <- (x-newCx[2])^2 + (y-newCy[2])^2
distTmp[3,] <- (x-newCx[3])^2 + (y-newCy[3])^2
finalClust <- apply(distTmp,2,which.min)
## Final centroids
finalCx <- tapply(x,finalClust,mean)
finalCy <- tapply(y,finalClust,mean)
points(finalCx,finalCy,col=cols1,pch=3,cex=2,lwd=2)
points(x,y,pch=19,cex=2,col=cols1[finalClust])


kmeans()

  • Important parameters: x, centers, iter.max, nstart
dataFrame <- data.frame(x,y)
kmeansObj <- kmeans(dataFrame,centers=3)
names(kmeansObj)
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
kmeansObj$cluster
##  [1] 2 2 2 2 1 1 1 1 3 3 3 3

kmeans()

par(mar=rep(0.2,4))
plot(x,y,col=kmeansObj$cluster,pch=19,cex=2)
points(kmeansObj$centers,col=1:3,pch=3,cex=3,lwd=3)


Heatmaps

set.seed(1234)
dataMatrix <- as.matrix(dataFrame)[sample(1:12),]
kmeansObj <- kmeans(dataMatrix,centers=3)
par(mfrow=c(1,2), mar = c(2, 4, 0.1, 0.1))
image(t(dataMatrix)[,nrow(dataMatrix):1],yaxt="n")
image(t(dataMatrix)[,order(kmeansObj$cluster)],yaxt="n")


Others

If you have a lot of points

x <- rnorm(1e5)
y <- rnorm(1e5)
plot(x,y,pch=19)

  • If you have a lot of points - sampling
x <- rnorm(1e5)
y <- rnorm(1e5)
sampledValues <- sample(1:1e5,size=1000,replace=FALSE)
plot(x[sampledValues],y[sampledValues],pch=19)

  • If you have a lot of points - smoothScatter
x <- rnorm(1e5)
y <- rnorm(1e5)
smoothScatter(x,y)

  • If you have a lot of points - hexbin {hexbin}
library(hexbin)
x <- rnorm(1e5)
y <- rnorm(1e5)
hbo <- hexbin(x,y)
plot(hbo)


Density plots

Important parameters (to plot): col,lwd,xlab,ylab,xlim,ylim

dens <- density(pData$AGEP)
plot(dens, lwd = 3, col = "blue")

  • Density plots - multiple distributions
dens <- density(pData$AGEP)
densMales <- density(pData$AGEP[which(pData$SEX == 1)])
plot(dens, lwd = 3, col = "blue")
lines(densMales, lwd = 3, col = "orange")


QQ-plots

  • Important parameters: x,y
x <- rnorm(20); y <- rnorm(20)
qqplot(x,y)
abline(c(0,1))


Matplot and spaghetti

  • Important paramters: x, y, lty,lwd,pch,col
X <- matrix(rnorm(20*5),nrow=20)
matplot(X,type="b")


Heatmaps

  • Important paramters: x,y,z,col
image(1:10,161:236,as.matrix(pData[1:10,161:236]))

  • Heatmaps - matching intuition
newMatrix <- as.matrix(pData[1:10,161:236])
newMatrix <- t(newMatrix)[,nrow(newMatrix):1]
image(161:236, 1:10, newMatrix)


Maps - very basics

You make need to run install.packages("maps") if you don’t have the maps package installed.

library(maps)
map("world")
lat <- runif(40,-180,180); lon <- runif(40,-90,90)
points(lat,lon,col="blue",pch=19)


Missing values and plots

x <- c(NA,NA,NA,4,5,6,7,8,9,10)
y <- 1:10
plot(x,y,pch=19,xlim=c(0,11),ylim=c(0,11))

  • Missing values and plots
x <- rnorm(100)
y <- rnorm(100)
y[x < 0] <- NA
boxplot(x ~ is.na(y))


xyplot

library(datasets)
data(cars)
library(lattice)
state <- data.frame(state.x77, region = state.region)
xyplot(Life.Exp ~ Income | region, data = state, layout = c(4,1))


Statistical Inference


Expected values

  • The expected value or mean of a random variable is the center of its distribution
  • For discrete random variable \(X\) with PMF \(p(x)\), it is defined as follows \[ E[X] = \sum_x xp(x). \] where the sum is taken over the possible values of \(x\)
  • \(E[X]\) represents the center of mass of a collection of locations and weights, \(\{x, p(x)\}\)

Find the center of mass of the bars

library(MASS);library(HistData);library(Hmisc);library(grid);library(lattice);library(survival);
library(Formula);library(ggplot2);library(UsingR)
data(galton)
par(mfrow=c(1,2))
hist(galton$child,col="blue",breaks=100)
hist(galton$parent,col="blue",breaks=100)

  • Using manipulate
library(manipulate)
myHist <- function(mu){
  hist(galton$child,col="blue",breaks=100)
  lines(c(mu, mu), c(0, 150),col="red",lwd=5)
  mse <- mean((galton$child - mu)^2)
  text(63, 150, paste("mu = ", mu))
  text(63, 140, paste("Imbalance = ", round(mse, 2)))
}
manipulate(myHist(mu), mu = slider(62, 74, step = 0.5))
  • The center of mass is the empirical mean
  hist(galton$child,col="blue",breaks=100)
  meanChild <- mean(galton$child)
  lines(rep(meanChild,100),seq(0,150,length=100),col="red",lwd=5)


Common Distributions

  • Different signal meaning

  • d:Probability Density Function - PDF
  • p:Cumulative Distribution Function - CDF
  • r:Generate random sample
  • q:number. eg. qbinom(0.7,8,0.5) - 5 : cumulative probs of 0 to 5 choose larger than 0.7

  • Different distribution

  • binom; norm; pois; exp; t


iid Bernoulli trials Bernoulli distribution

  • If several iid Bernoulli observations, say \(x_1,\ldots, x_n\), are observed the likelihood is \[ \prod_{i=1}^n p^{x_i} (1 - p)^{1 - x_i} = p^{\sum x_i} (1 - p)^{n - \sum x_i} \]
  • Notice that the likelihood depends only on the sum of the \(x_i\)
  • Because \(n\) is fixed and assumed known, this implies that the sample proportion \(\sum_i x_i / n\) contains all of the relevant information about \(p\)
  • We can maximize the Bernoulli likelihood over \(p\) to obtain that \(\hat p = \sum_i x_i / n\) is the maximum likelihood estimator for \(p\)

  • Plotting all possible likelihoods for a small n

n <- 5
pvals <- seq(0, 1, length = 1000)
plot(c(0, 1), c(0, 1.2), type = "n", frame = FALSE, xlab = "p", ylab = "likelihood")
text((0 : n) /n, 1.1, as.character(0 : n))
sapply(0 : n, function(x) {
  phat <- x / n
  if (x == 0) lines(pvals,  ( (1 - pvals) / (1 - phat) )^(n-x), lwd = 3)
  else if (x == n) lines(pvals, (pvals / phat) ^ x, lwd = 3)
  else lines(pvals, (pvals / phat ) ^ x * ( (1 - pvals) / (1 - phat) ) ^ (n-x), lwd = 3) 
  }
)
title(paste("Likelihoods for n = ", n))


Choose , , binomial distribution

  • Recall that the notation \[\left( \begin{array}{c} n \\ x \end{array} \right) = \frac{n!}{x!(n-x)!} \] (read “\(n\) choose \(x\)”) counts the number of ways of selecting \(x\) items out of \(n\) without replacement disregarding the order of the items

\[\left( \begin{array}{c} n \\ 0 \end{array} \right) = \left( \begin{array}{c} n \\ n \end{array} \right) = 1 \]


  • Example

  • Suppose a friend has \(8\) children (oh my!), \(7\) of which are girls and none are twins
  • If each gender has an independent \(50\)% probability for each birth, what’s the probability of getting \(7\) or more girls out of \(8\) births?

choose(8, 7) * .5 ^ 8 + choose(8,8) * .5 ^ 8 
## [1] 0.03515625
pbinom(6, size = 8, prob = .5, lower.tail = FALSE)
## [1] 0.03515625
pbinom(6, size = 8, prob = .5, lower.tail = TRUE)
## [1] 0.9648438
dbinom(7,8,0.5, log = FALSE)+dbinom(8,8,0.5, log = FALSE)
## [1] 0.03515625

  • Vertify the likelihood
plot(pvals, dbinom(7, 8, pvals) / dbinom(7, 8, 7/8) , 
     lwd = 3, frame = FALSE, type = "l", xlab = "p", ylab = "likelihood")
abline(v = 7/8, lwd = 3, col = "red")


The normal distribution

  • A random variable is said to follow a normal or Gaussian distribution with mean \(\mu\) and variance \(\sigma^2\) if the associated density is \[ (2\pi \sigma^2)^{-1/2}e^{-(x - \mu)^2/2\sigma^2} \] If \(X\) a RV with this density then \(E[X] = \mu\) and \(Var(X) = \sigma^2\)
  • We write \(X\sim \mbox{N}(\mu, \sigma^2)\)
  • When \(\mu = 0\) and \(\sigma = 1\) the resulting distribution is called the standard normal distribution
  • The standard normal density function is labeled \(\phi\)
  • Standard normal RVs are often labeled \(Z\)

zvals <- seq(-3, 3, length = 1000)
plot(zvals, dnorm(zvals), 
     type = "l", lwd = 3, frame = FALSE, xlab = "z", ylab = "Density")
sapply(-3 : 3, function(k) abline(v = k))


  • Facts about the normal density

  • If \(X \sim \mbox{N}(\mu,\sigma^2)\) the \(Z = \frac{X -\mu}{\sigma}\) is standard normal
  • If \(Z\) is standard normal \[X = \mu + \sigma Z \sim \mbox{N}(\mu, \sigma^2)\]
  • The non-standard normal density is \[\phi\{(x - \mu) / \sigma\}/\sigma\]


  • More facts about the normal density
  1. Approximately \(68\%\), \(95\%\) and \(99\%\) of the normal density lies within \(1\), \(2\) and \(3\) standard deviations from the mean, respectively
  2. \(-1.28\), \(-1.645\), \(-1.96\) and \(-2.33\) are the \(10^{th}\), \(5^{th}\), \(2.5^{th}\) and \(1^{st}\) percentiles of the standard normal distribution respectively
  3. By symmetry, \(1.28\), \(1.645\), \(1.96\) and \(2.33\) are the \(90^{th}\), \(95^{th}\), \(97.5^{th}\) and \(99^{th}\) percentiles of the standard normal distribution respectively

  • Question

  • What is the \(95^{th}\) percentile of a \(N(\mu, \sigma^2)\) distribution?
  • Quick answer in R qnorm(.95, mean = mu, sd = sd)
  • We want the point \(x_0\) so that \(P(X \leq x_0) = .95\) \[ \begin{eqnarray*} P(X \leq x_0) & = & P\left(\frac{X - \mu}{\sigma} \leq \frac{x_0 - \mu}{\sigma}\right) \\ \\ & = & P\left(Z \leq \frac{x_0 - \mu}{\sigma}\right) = .95 \end{eqnarray*} \]
  • Therefore \[\frac{x_0 - \mu}{\sigma} = 1.645\] or \(x_0 = \mu + \sigma 1.645\)
  • In general \(x_0 = \mu + \sigma z_0\) where \(z_0\) is the appropriate standard normal quantile


The Poisson distribution

  • Used to model counts
  • The Poisson mass function is \[ P(X = x; \lambda) = \frac{\lambda^x e^{-\lambda}}{x!} \] for \(x=0,1,\ldots\)
  • The mean of this distribution is \(\lambda\)
  • The variance of this distribution is \(\lambda\)
  • Notice that \(x\) ranges from \(0\) to \(\infty\)

Some uses for the Poisson distribution

  • Modeling event/time data
  • Modeling radioactive decay
  • Modeling survival data
  • Modeling unbounded count data
  • Modeling contingency tables
  • Approximating binomials when \(n\) is large and \(p\) is small

Poisson derivation

  • \(\lambda\) is the mean number of events per unit time
  • Let \(h\) be very small
  • Suppose we assume that
  • Prob. of an event in an interval of length \(h\) is \(\lambda h\) while the prob. of more than one event is negligible
  • Whether or not an event occurs in one small interval does not impact whether or not an event occurs in another small interval then, the number of events per unit time is Poisson with mean \(\lambda\)

Rates and Poisson random variables

  • Poisson random variables are used to model rates
  • \(X \sim Poisson(\lambda t)\) where
  • \(\lambda = E[X / t]\) is the expected count per unit of time
  • \(t\) is the total monitoring time

Poisson approximation to the binomial

  • When \(n\) is large and \(p\) is small the Poisson distribution is an accurate approximation to the binomial distribution
  • Notation
  • \(\lambda = n p\)
  • \(X \sim \mbox{Binomial}(n, p)\), \(\lambda = n p\) and
  • \(n\) gets large
  • \(p\) gets small
  • \(\lambda\) stays constant

Example

The number of people that show up at a bus stop is Poisson with a mean of \(2.5\) per hour.

If watching the bus stop for 4 hours, what is the probability that \(3\) or fewer people show up for the whole time?

ppois(3, lambda = 2.5 * 4)
## [1] 0.01033605

Example, Poisson approximation to the binomial

We flip a coin with success probablity \(0.01\) five hundred times. What’s the probability of 2 or fewer successes?

pbinom(2, size = 500, prob = .01)
## [1] 0.1233858
ppois(2, lambda=500 * .01)
## [1] 0.124652

Asymptotics

  • Asymptotics is the term for the behavior of statistics as the sample size (or some other relevant quantity) limits to infinity (or some other relevant number)
  • (Asymptopia is my name for the land of asymptotics, where everything works out well and there’s no messes. The land of infinite data is nice that way.)
  • Asymptotics are incredibly useful for simple statistical inference and approximations
  • (Not covered in this class) Asymptotics often lead to nice understanding of procedures
  • Asymptotics generally give no assurances about finite sample performance
  • The kinds of asymptotics that do are orders of magnitude more difficult to work with
  • Asymptotics form the basis for frequency interpretation of probabilities (the long run proportion of times an event occurs)
  • To understand asymptotics, we need a very basic understanding of limits.

The Law of Large Numbers

  • Establishing that a random sequence converges to a limit is hard
  • Fortunately, we have a theorem that does all the work for us, called the Law of Large Numbers
  • The law of large numbers states that if \(X_1,\ldots X_n\) are iid from a population with mean \(\mu\) and variance \(\sigma^2\) then \(\bar X_n\) converges in probability to \(\mu\)
  • (There are many variations on the LLN; we are using a particularly lazy version, my favorite kind of version)

  • Law of large numbers in action
n <- 10000; means <- cumsum(rnorm(n)) / (1  : n)
plot(1 : n, means, type = "l", lwd = 2, 
     frame = FALSE, ylab = "cumulative means", xlab = "sample size")
abline(h = 0)


The Central Limit Theorem

  • The Central Limit Theorem (CLT) is one of the most important theorems in statistics
  • For our purposes, the CLT states that the distribution of averages of iid variables, properly normalized, becomes that of a standard normal as the sample size increases
  • The CLT applies in an endless variety of settings
  • Let \(X_1,\ldots,X_n\) be a collection of iid random variables with mean \(\mu\) and variance \(\sigma^2\)
  • Let \(\bar X_n\) be their sample average
  • Then \(\frac{\bar X_n - \mu}{\sigma / \sqrt{n}}\) has a distribution like that of a standard normal for large \(n\).
  • Remember the form \[\frac{\bar X_n - \mu}{\sigma / \sqrt{n}} = \frac{\mbox{Estimate} - \mbox{Mean of estimate}}{\mbox{Std. Err. of estimate}}. \]
  • Usually, replacing the standard error by its estimated value doesn’t change the CLT

Simulation of mean of \(n\) dice

par(mfrow = c(1, 3))
for (n in c(1, 2, 6)){
  temp <- matrix(sample(1 : 6, n * 10000, replace = TRUE), ncol = n)
  temp <- apply(temp, 1, mean)
  temp <- (temp - 3.5) / (1.71 / sqrt(n)) 
  dty <- density(temp)
  plot(dty$x, dty$y, xlab = "", ylab = "density", type = "n", xlim = c(-3, 3), ylim = c(0, .5))
  title(paste("sample mean of", n, "obs"))
  lines(seq(-3, 3, length = 100), dnorm(seq(-3, 3, length = 100)), col = grey(.8), lwd = 3)
  lines(dty$x, dty$y, lwd = 2)
}


Coin CLT

  • Let \(X_i\) be the \(0\) or \(1\) result of the \(i^{th}\) flip of a possibly unfair coin
  • The sample proportion, say \(\hat p\), is the average of the coin flips
  • \(E[X_i] = p\) and \(Var(X_i) = p(1-p)\)
  • Standard error of the mean is \(\sqrt{p(1-p)/n}\)
  • Then \[ \frac{\hat p - p}{\sqrt{p(1-p)/n}} \] will be approximately normally distributed

par(mfrow = c(2, 3))
for (n in c(1, 10, 20)){
  temp <- matrix(sample(0 : 1, n * 10000, replace = TRUE), ncol = n)
  temp <- apply(temp, 1, mean)
  temp <- (temp - .5) * 2 * sqrt(n)
  dty <- density(temp)
  plot(dty$x, dty$y, xlab = "", ylab = "density", type = "n", xlim = c(-3, 3), ylim = c(0, .5))
  title(paste("sample mean of", n, "obs"))
  lines(seq(-3, 3, length = 100), dnorm(seq(-3, 3, length = 100)), col = grey(.8), lwd = 3)
  lines(dty$x, dty$y, lwd = 2)
}
for (n in c(1, 10, 20)){
  temp <- matrix(sample(0 : 1, n * 10000, replace = TRUE, prob = c(.9, .1)), ncol = n)
  temp <- apply(temp, 1, mean)
  temp <- (temp - .1) / sqrt(.1 * .9 / n)
  dty <- density(temp)
  plot(dty$x, dty$y, xlab = "", ylab = "density", type = "n", xlim = c(-3, 3), ylim = c(0, .5))
  title(paste("sample mean of", n, "obs"))
  lines(seq(-3, 3, length = 100), dnorm(seq(-3, 3, length = 100)), col = grey(.8), lwd = 3)
  lines(dty$x, dty$y, lwd = 2)
}


CLT in practice

  • In practice the CLT is mostly useful as an approximation \[ P\left( \frac{\bar X_n - \mu}{\sigma / \sqrt{n}} \leq z \right) \approx \Phi(z). \]
  • Recall \(1.96\) is a good approximation to the \(.975^{th}\) quantile of the standard normal
  • Consider \[ \begin{eqnarray*} .95 & \approx & P\left( -1.96 \leq \frac{\bar X_n - \mu}{\sigma / \sqrt{n}} \leq 1.96 \right)\\ \\ & = & P\left(\bar X_n +1.96 \sigma/\sqrt{n} \geq \mu \geq \bar X_n - 1.96\sigma/\sqrt{n} \right),\\ \end{eqnarray*} \]

Confidence intervals

  • Therefore, according to the CLT, the probability that the random interval \[\bar X_n \pm z_{1-\alpha/2}\sigma / \sqrt{n}\] contains \(\mu\) is approximately 100\((1-\alpha)\)%, where \(z_{1-\alpha/2}\) is the \(1-\alpha/2\) quantile of the standard normal distribution
  • This is called a \(100(1 - \alpha)\)% confidence interval for \(\mu\)
  • We can replace the unknown \(\sigma\) with \(s\)

Give a confidence interval for the average height of sons

in Galton’s data

library(UsingR);data(father.son); x <- father.son$sheight
(mean(x) + c(-1, 1) * qnorm(.975) * sd(x) / sqrt(length(x))) / 12
## [1] 5.709670 5.737674

Sample proportions

  • In the event that each \(X_i\) is \(0\) or \(1\) with common success probability \(p\) then \(\sigma^2 = p(1 - p)\)
  • The interval takes the form \[ \hat p \pm z_{1 - \alpha/2} \sqrt{\frac{p(1 - p)}{n}} \]
  • Replacing \(p\) by \(\hat p\) in the standard error results in what is called a Wald confidence interval for \(p\)
  • Also note that \(p(1-p) \leq 1/4\) for \(0 \leq p \leq 1\)
  • Let \(\alpha = .05\) so that \(z_{1 -\alpha/2} = 1.96 \approx 2\) then \[ 2 \sqrt{\frac{p(1 - p)}{n}} \leq 2 \sqrt{\frac{1}{4n}} = \frac{1}{\sqrt{n}} \]
  • Therefore \(\hat p \pm \frac{1}{\sqrt{n}}\) is a quick CI estimate for \(p\)

  • Your campaign advisor told you that in a random sample of 100 likely voters, 56 intent to vote for you.
  • Can you relax? Do you have this race in the bag?
  • Without access to a computer or calculator, how precise is this estimate?
  • 1/sqrt(100)=.1 so a back of the envelope calculation gives an approximate 95% interval of (0.46, 0.66)
  • Not enough for you to relax, better go do more campaigning!
  • Rough guidelines, 100 for 1 decimal place, 10,000 for 2, 1,000,000 for 3.
round(1 / sqrt(10 ^ (1 : 6)), 3)
## [1] 0.316 0.100 0.032 0.010 0.003 0.001

Poisson interval

  • A nuclear pump failed 5 times out of 94.32 days, give a 95% confidence interval for the failure rate per day?
  • \(X \sim Poisson(\lambda t)\).
  • Estimate \(\hat \lambda = X/t\)
  • \(Var(\hat \lambda) = \lambda / t\) \[ \frac{\hat \lambda - \lambda}{\sqrt{\hat \lambda / t}} = \frac{X - t \lambda}{\sqrt{X}} \rightarrow N(0,1) \]
  • This isn’t the best interval.
  • There are better asymptotic intervals.
  • You can get an exact CI in this case.

R code

x <- 5; t <- 94.32; lambda <- x / t
round(lambda + c(-1, 1) * qnorm(.975) * sqrt(lambda / t), 3)
## [1] 0.007 0.099
poisson.test(x, T = 94.32)$conf
## [1] 0.01721254 0.12371005
## attr(,"conf.level")
## [1] 0.95

In the regression class

exp(confint(glm(x ~ 1 + offset(log(t)), family = poisson(link = log))))
## Waiting for profiling to be done...
##      2.5 %     97.5 % 
## 0.01900677 0.11393446

T Confidence Intervals

  • In the previous, we discussed creating a confidence interval using the CLT
  • In this lecture, we discuss some methods for small samples, notably Gosset’s \(t\) distribution
  • To discuss the \(t\) distribution we must discuss the Chi-squared distribution
  • Throughout we use the following general procedure for creating CIs
  1. Create a Pivot or statistic that does not depend on the parameter of interest

  2. Solve the probability that the pivot lies between bounds for the parameter


The Chi-squared distribution

  • Suppose that \(S^2\) is the sample variance from a collection of iid \(N(\mu,\sigma^2)\) data; then \[ \frac{(n - 1) S^2}{\sigma^2} \sim \chi^2_{n-1} \] which reads: follows a Chi-squared distribution with \(n-1\) degrees of freedom
  • The Chi-squared distribution is skewed and has support on \(0\) to \(\infty\)
  • The mean of the Chi-squared is its degrees of freedom
  • The variance of the Chi-squared distribution is twice the degrees of freedom

Confidence interval for the variance

Note that if \(\chi^2_{n-1, \alpha}\) is the \(\alpha\) quantile of the Chi-squared distribution then

\[ \begin{eqnarray*} 1 - \alpha & = & P \left( \chi^2_{n-1, \alpha/2} \leq \frac{(n - 1) S^2}{\sigma^2} \leq \chi^2_{n-1,1 - \alpha/2} \right) \\ \\ & = & P\left(\frac{(n-1)S^2}{\chi^2_{n-1,1-\alpha/2}} \leq \sigma^2 \leq \frac{(n-1)S^2}{\chi^2_{n-1,\alpha/2}} \right) \\ \end{eqnarray*} \] So that \[ \left[\frac{(n-1)S^2}{\chi^2_{n-1,1-\alpha/2}}, \frac{(n-1)S^2}{\chi^2_{n-1,\alpha/2}}\right] \] is a \(100(1-\alpha)\%\) confidence interval for \(\sigma^2\)


Notes about this interval

  • This interval relies heavily on the assumed normality
  • Square-rooting the endpoints yields a CI for \(\sigma\)

Example

  • Confidence interval for the standard deviation of sons’ heights from Galton’s data
library(UsingR); data(father.son); x <- father.son$sheight
s <- sd(x); n <- length(x)
round(sqrt( (n-1) * s ^ 2 / qchisq(c(.975, .025), n - 1) ), 3)
## [1] 2.701 2.939

Gosset’s \(t\) distribution

  • Invented by William Gosset (under the pseudonym “Student”) in 1908
  • Has thicker tails than the normal
  • Is indexed by a degrees of freedom; gets more like a standard normal as df gets larger
  • Is obtained as \[ \frac{Z}{\sqrt{\frac{\chi^2}{df}}} \] where \(Z\) and \(\chi^2\) are independent standard normals and Chi-squared distributions respectively

Result

  • Suppose that \((X_1,\ldots,X_n)\) are iid \(N(\mu,\sigma^2)\), then:
  1. \(\frac{\bar X - \mu}{\sigma / \sqrt{n}}\) is standard normal
  2. \(\sqrt{\frac{(n - 1) S^2}{\sigma^2 (n - 1)}} = S / \sigma\) is the square root of a Chi-squared divided by its df
  • Therefore \[ \frac{\frac{\bar X - \mu}{\sigma /\sqrt{n}}}{S/\sigma} = \frac{\bar X - \mu}{S/\sqrt{n}} \] follows Gosset’s \(t\) distribution with \(n-1\) degrees of freedom

Confidence intervals for the mean

  • Notice that the \(t\) statistic is a pivot, therefore we use it to create a confidence interval for \(\mu\)
  • Let \(t_{df,\alpha}\) be the \(\alpha^{th}\) quantile of the t distribution with \(df\) degrees of freedom \[ \begin{eqnarray*} & & 1 - \alpha \\ & = & P\left(-t_{n-1,1-\alpha/2} \leq \frac{\bar X - \mu}{S/\sqrt{n}} \leq t_{n-1,1-\alpha/2}\right) \\ \\ & = & P\left(\bar X - t_{n-1,1-\alpha/2} S / \sqrt{n} \leq \mu \leq \bar X + t_{n-1,1-\alpha/2}S /\sqrt{n}\right) \end{eqnarray*} \]
  • Interval is \(\bar X \pm t_{n-1,1-\alpha/2} S/\sqrt{n}\)

Note’s about the \(t\) interval

  • The \(t\) interval technically assumes that the data are iid normal, though it is robust to this assumption
  • It works well whenever the distribution of the data is roughly symmetric and mound shaped
  • Paired observations are often analyzed using the \(t\) interval by taking differences
  • For large degrees of freedom, \(t\) quantiles become the same as standard normal quantiles; therefore this interval converges to the same interval as the CLT yielded
  • For skewed distributions, the spirit of the \(t\) interval assumptions are violated
  • Also, for skewed distributions, it doesn’t make a lot of sense to center the interval at the mean
  • In this case, consider taking logs or using a different summary like the median
  • For highly discrete data, like binary, other intervals are available

Sleep data

In R typing data(sleep) brings up the sleep data originally analyzed in Gosset’s Biometrika paper, which shows the increase in hours for 10 patients on two soporific drugs. R treats the data as two groups rather than paired.


The data

data(sleep)
g1 <- sleep$extra[1 : 10]; g2 <- sleep$extra[11 : 20]
difference <- g2 - g1
mn <- mean(difference); s <- sd(difference); n <- 10
mn + c(-1, 1) * qt(.975, n-1) * s / sqrt(n)
## [1] 0.7001142 2.4598858
t.test(difference)$conf.int
## [1] 0.7001142 2.4598858
## attr(,"conf.level")
## [1] 0.95

Likelihood

  • A common and fruitful approach to statistics is to assume that the data arises from a family of distributions indexed by a parameter that represents a useful summary of the distribution
  • The likelihood of a collection of data is the joint density evaluated as a function of the parameters with the data fixed
  • Likelihood analysis of data uses the likelihood to perform inference regarding the unknown parameter

Given a statistical probability mass function or density, say \(f(x, \theta)\), where \(\theta\) is an unknown parameter, the likelihood is \(f\) viewed as a function of \(\theta\) for a fixed, observed value of \(x\).


Interpretations of likelihoods

The likelihood has the following properties:

  1. Ratios of likelihood values measure the relative evidence of one value of the unknown parameter to another.
  2. Given a statistical model and observed data, all of the relevant information contained in the data regarding the unknown parameter is contained in the likelihood.
  3. If \(\{X_i\}\) are independent random variables, then their likelihoods multiply. That is, the likelihood of the parameters given all of the \(X_i\) is simply the product of the individual likelihoods.

Example

  • Suppose that we flip a coin with success probability \(\theta\)
  • Recall that the mass function for \(x\) \[ f(x,\theta) = \theta^x(1 - \theta)^{1 - x} ~~~\mbox{for}~~~ \theta \in [0,1]. \] where \(x\) is either \(0\) (Tails) or \(1\) (Heads)
  • Suppose that the result is a head
  • The likelihood is \[ {\cal L}(\theta, 1) = \theta^1 (1 - \theta)^{1 - 1} = \theta ~~~\mbox{for} ~~~ \theta \in [0,1]. \]
  • Therefore, \({\cal L}(.5, 1) / {\cal L}(.25, 1) = 2\),
  • There is twice as much evidence supporting the hypothesis that \(\theta = .5\) to the hypothesis that \(\theta = .25\)

  • Suppose now that we flip our coin from the previous example 4 times and get the sequence 1, 0, 1, 1
  • The likelihood is: \[ \begin{eqnarray*} {\cal L}(\theta, 1,0,1,1) & = & \theta^1 (1 - \theta)^{1 - 1} \theta^0 (1 - \theta)^{1 - 0} \\ & \times & \theta^1 (1 - \theta)^{1 - 1} \theta^1 (1 - \theta)^{1 - 1}\\ & = & \theta^3(1 - \theta)^1 \end{eqnarray*} \]
  • This likelihood only depends on the total number of heads and the total number of tails; we might write \({\cal L}(\theta, 1, 3)\) for shorthand
  • Now consider \({\cal L}(.5, 1, 3) / {\cal L}(.25, 1, 3) = 5.33\)
  • There is over five times as much evidence supporting the hypothesis that \(\theta = .5\) over that \(\theta = .25\)


Plotting likelihoods

  • Generally, we want to consider all the values of \(\theta\) between 0 and 1
  • A likelihood plot displays \(\theta\) by \({\cal L}(\theta,x)\)
  • Because the likelihood measures relative evidence, dividing the curve by its maximum value (or any other value for that matter) does not change its interpretation

pvals <- seq(0, 1, length = 1000)
plot(pvals, dbinom(3, 4, pvals) / dbinom(3, 4, 3/4), type = "l", frame = FALSE, lwd = 3, xlab = "p", ylab = "likelihood / max likelihood")


Maximum likelihood

  • The value of \(\theta\) where the curve reaches its maximum has a special meaning
  • It is the value of \(\theta\) that is most well supported by the data
  • This point is called the maximum likelihood estimate (or MLE) of \(\theta\) \[ MLE = \mathrm{argmax}_\theta {\cal L}(\theta, x). \]
  • Another interpretation of the MLE is that it is the value of \(\theta\) that would make the data that we observed most probable

Some results

  • \(X_1, \ldots, X_n \stackrel{iid}{\sim} N(\mu, \sigma^2)\) the MLE of \(\mu\) is \(\bar X\) and the ML of \(\sigma^2\) is the biased sample variance estimate.
  • If \(X_1,\ldots, X_n \stackrel{iid}{\sim} Bernoulli(p)\) then the MLE of \(p\) is \(\bar X\) (the sample proportion of 1s).
  • If \(X_i \stackrel{iid}{\sim} Binomial(n_i, p)\) then the MLE of \(p\) is \(\frac{\sum_{i=1}^n X_i}{\sum_{i=1}^n n_i}\) (the sample proportion of 1s).
  • If \(X \stackrel{iid}{\sim} Poisson(\lambda t)\) then the MLE of \(\lambda\) is \(X/t\).
  • If \(X_i \stackrel{iid}{\sim} Poisson(\lambda t_i)\) then the MLE of \(\lambda\) is \(\frac{\sum_{i=1}^n X_i}{\sum_{i=1}^n t_i}\)

Example

  • You saw 5 failure events per 94 days of monitoring a nuclear pump.
  • Assuming Poisson, plot the likelihood

lambda <- seq(0, .2, length = 1000)
likelihood <- dpois(5, 94 * lambda) / dpois(5, 5)
plot(lambda, likelihood, frame = FALSE, lwd = 3, type = "l", xlab = expression(lambda))
lines(rep(5/94, 2), 0 : 1, col = "red", lwd = 3)
lines(range(lambda[likelihood > 1/16]), rep(1/16, 2), lwd = 2)
lines(range(lambda[likelihood > 1/8]), rep(1/8, 2), lwd = 2)


Bayesian inference

  • Bayesian statistics posits a prior on the parameter of interest
  • All inferences are then performed on the distribution of the parameter given the data, called the posterior
  • In general, \[ \mbox{Posterior} \propto \mbox{Likelihood} \times \mbox{Prior} \]
  • Therefore (as we saw in diagnostic testing) the likelihood is the factor by which our prior beliefs are updated to produce conclusions in the light of the data

Prior specification

  • The beta distribution is the default prior for parameters between \(0\) and \(1\).
  • The beta density depends on two parameters \(\alpha\) and \(\beta\) \[ \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha)\Gamma(\beta)} p ^ {\alpha - 1} (1 - p) ^ {\beta - 1} ~~~~\mbox{for} ~~ 0 \leq p \leq 1 \]
  • The mean of the beta density is \(\alpha / (\alpha + \beta)\)
  • The variance of the beta density is \[\frac{\alpha \beta}{(\alpha + \beta)^2 (\alpha + \beta + 1)}\]
  • The uniform density is the special case where \(\alpha = \beta = 1\)

## Exploring the beta density
library(manipulate)
pvals <- seq(0.01, 0.99, length = 1000)
manipulate(
    plot(pvals, dbeta(pvals, alpha, beta), type = "l", lwd = 3, frame = FALSE),
    alpha = slider(0.01, 10, initial = 1, step = .5),
    beta = slider(0.01, 10, initial = 1, step = .5)
    )

Posterior

  • Suppose that we chose values of \(\alpha\) and \(\beta\) so that the beta prior is indicative of our degree of belief regarding \(p\) in the absence of data
  • Then using the rule that \[ \mbox{Posterior} \propto \mbox{Likelihood} \times \mbox{Prior} \] and throwing out anything that doesn’t depend on \(p\), we have that \[ \begin{align} \mbox{Posterior} &\propto p^x(1 - p)^{n-x} \times p^{\alpha -1} (1 - p)^{\beta - 1} \\ & = p^{x + \alpha - 1} (1 - p)^{n - x + \beta - 1} \end{align} \]
  • This density is just another beta density with parameters \(\tilde \alpha = x + \alpha\) and \(\tilde \beta = n - x + \beta\)

Posterior mean

\[ \begin{align} E[p ~|~ X] & = \frac{\tilde \alpha}{\tilde \alpha + \tilde \beta}\\ \\ & = \frac{x + \alpha}{x + \alpha + n - x + \beta}\\ \\ & = \frac{x + \alpha}{n + \alpha + \beta} \\ \\ & = \frac{x}{n} \times \frac{n}{n + \alpha + \beta} + \frac{\alpha}{\alpha + \beta} \times \frac{\alpha + \beta}{n + \alpha + \beta} \\ \\ & = \mbox{MLE} \times \pi + \mbox{Prior Mean} \times (1 - \pi) \end{align} \]


Thoughts

  • The posterior mean is a mixture of the MLE (\(\hat p\)) and the prior mean
  • \(\pi\) goes to \(1\) as \(n\) gets large; for large \(n\) the data swamps the prior
  • For small \(n\), the prior mean dominates
  • Generalizes how science should ideally work; as data becomes increasingly available, prior beliefs should matter less and less
  • With a prior that is degenerate at a value, no amount of data can overcome the prior

Example

  • Suppose that in a random sample of an at-risk population \(13\) of \(20\) subjects had hypertension. Estimate the prevalence of hypertension in this population.
  • \(x = 13\) and \(n=20\)
  • Consider a uniform prior, \(\alpha = \beta = 1\)
  • The posterior is proportional to (see formula above) \[ p^{x + \alpha - 1} (1 - p)^{n - x + \beta - 1} = p^x (1 - p)^{n-x} \] That is, for the uniform prior, the posterior is the likelihood
  • Consider the instance where \(\alpha = \beta = 2\) (recall this prior is humped around the point \(.5\)) the posterior is \[ p^{x + \alpha - 1} (1 - p)^{n - x + \beta - 1} = p^{x + 1} (1 - p)^{n-x + 1} \]
  • The “Jeffrey’s prior” which has some theoretical benefits puts \(\alpha = \beta = .5\)

library(manipulate)
pvals <- seq(0.01, 0.99, length = 1000)
x <- 13; n <- 20
myPlot <- function(alpha, beta){
    plot(0 : 1, 0 : 1, type = "n", xlab = "p", ylab = "", frame = FALSE)
    lines(pvals, dbeta(pvals, alpha, beta) / max(dbeta(pvals, alpha, beta)), 
            lwd = 3, col = "darkred")
    lines(pvals, dbinom(x,n,pvals) / dbinom(x,n,x/n), lwd = 3, col = "darkblue")
    lines(pvals, dbeta(pvals, alpha+x, beta+(n-x)) / max(dbeta(pvals, alpha+x, beta+(n-x))),
        lwd = 3, col = "darkgreen")
    title("red=prior,green=posterior,blue=likelihood")
}
manipulate(
    myPlot(alpha, beta),
    alpha = slider(0.01, 100, initial = 1, step = .5),
    beta = slider(0.01, 100, initial = 1, step = .5)
    )

Credible intervals

  • A Bayesian credible interval is the Bayesian analog of a confidence interval
  • A \(95\%\) credible interval, \([a, b]\) would satisfy \[ P(p \in [a, b] ~|~ x) = .95 \]
  • The best credible intervals chop off the posterior with a horizontal line in the same way we did for likelihoods
  • These are called highest posterior density (HPD) intervals

Getting HPD intervals for this example

  • Install the package, then the command
library(binom)
binom.bayes(13, 20, type = "highest")
##   method  x  n shape1 shape2      mean     lower     upper        sig
## 1  bayes 13 20   13.5    7.5 0.6428571 0.4423068 0.8360884 0.04999999

gives the HPD interval. - The default credible level is \(95\%\) and the default prior is the Jeffrey’s prior.


library(manipulate)
pvals <- seq(0.01, 0.99, length = 1000)
x <- 13; n <- 20
myPlot2 <- function(alpha, beta, cl){
    plot(pvals, dbeta(pvals, alpha+x, beta+(n-x)), type = "l", lwd = 3,
    xlab = "p", ylab = "", frame = FALSE)
    out <- binom.bayes(x, n, type = "highest", 
        prior.shape1 = alpha, 
        prior.shape2 = beta, 
        conf.level = cl)
    p1 <- out$lower; p2 <- out$upper
    lines(c(p1, p1, p2, p2), c(0, dbeta(c(p1, p2), alpha+x, beta+(n-x)), 0), 
        type = "l", lwd = 3, col = "darkred")
}
manipulate(
    myPlot2(alpha, beta, cl),
    alpha = slider(0.01, 10, initial = 1, step = .5),
    beta = slider(0.01, 10, initial = 1, step = .5),
    cl = slider(0.01, 0.99, initial = 0.95, step = .01)
    )

Two group intervals

Independent group \(t\) confidence intervals – Pooled T-test

  • Suppose that we want to compare the mean blood pressure between two groups in a randomized trial; those who received the treatment to those who received a placebo
  • We cannot use the paired t test because the groups are independent and may have different sample sizes
  • We now present methods for comparing independent groups

Notation

  • Let \(X_1,\ldots,X_{n_x}\) be iid \(N(\mu_x,\sigma^2)\)
  • Let \(Y_1,\ldots,Y_{n_y}\) be iid \(N(\mu_y, \sigma^2)\)
  • Let \(\bar X\), \(\bar Y\), \(S_x\), \(S_y\) be the means and standard deviations
  • Using the fact that linear combinations of normals are again normal, we know that \(\bar Y - \bar X\) is also normal with mean \(\mu_y - \mu_x\) and variance \(\sigma^2 (\frac{1}{n_x} + \frac{1}{n_y})\)
  • The pooled variance estimator \[S_p^2 = \{(n_x - 1) S_x^2 + (n_y - 1) S_y^2\}/(n_x + n_y - 2)\] is a good estimator of \(\sigma^2\)

Note

  • The pooled estimator is a mixture of the group variances, placing greater weight on whichever has a larger sample size
  • If the sample sizes are the same the pooled variance estimate is the average of the group variances
  • The pooled estimator is unbiased \[ \begin{eqnarray*} E[S_p^2] & = & \frac{(n_x - 1) E[S_x^2] + (n_y - 1) E[S_y^2]}{n_x + n_y - 2}\\ & = & \frac{(n_x - 1)\sigma^2 + (n_y - 1)\sigma^2}{n_x + n_y - 2} \end{eqnarray*} \]
  • The pooled variance estimate is independent of \(\bar Y - \bar X\) since \(S_x\) is independent of \(\bar X\) and \(S_y\) is independent of \(\bar Y\) and the groups are independent

Result

  • The sum of two independent Chi-squared random variables is Chi-squared with degrees of freedom equal to the sum of the degrees of freedom of the summands
  • Therefore \[ \begin{eqnarray*} (n_x + n_y - 2) S_p^2 / \sigma^2 & = & (n_x - 1)S_x^2 /\sigma^2 + (n_y - 1)S_y^2/\sigma^2 \\ \\ & = & \chi^2_{n_x - 1} + \chi^2_{n_y-1} \\ \\ & = & \chi^2_{n_x + n_y - 2} \end{eqnarray*} \]

Putting this all together

  • The statistic \[ \frac{\frac{\bar Y - \bar X - (\mu_y - \mu_x)}{\sigma \left(\frac{1}{n_x} + \frac{1}{n_y}\right)^{1/2}}}% {\sqrt{\frac{(n_x + n_y - 2) S_p^2}{(n_x + n_y - 2)\sigma^2}}} = \frac{\bar Y - \bar X - (\mu_y - \mu_x)}{S_p \left(\frac{1}{n_x} + \frac{1}{n_y}\right)^{1/2}} \] is a standard normal divided by the square root of an independent Chi-squared divided by its degrees of freedom
  • Therefore this statistic follows Gosset’s \(t\) distribution with \(n_x + n_y - 2\) degrees of freedom
  • Notice the form is (estimator - true value) / SE

Pooled T-test Confidence interval

  • Therefore a \((1 - \alpha)\times 100\%\) confidence interval for \(\mu_y - \mu_x\) is \[ \bar Y - \bar X \pm t_{n_x + n_y - 2, 1 - \alpha/2}S_p\left(\frac{1}{n_x} + \frac{1}{n_y}\right)^{1/2} \]
  • Remember this interval is assuming a constant variance across the two groups
  • If there is some doubt, assume a different variance per group, which we will discuss later

Example

  • Based on Rosner, Fundamentals of Biostatistics

  • Comparing SBP for 8 oral contraceptive users versus 21 controls
  • \(\bar X_{OC} = 132.86\) mmHg with \(s_{OC} = 15.34\) mmHg
  • \(\bar X_{C} = 127.44\) mmHg with \(s_{C} = 18.23\) mmHg
  • Pooled variance estimate

sp <- sqrt((7 * 15.34^2 + 20 * 18.23^2) / (8 + 21 - 2))
132.86 - 127.44 + c(-1, 1) * qt(.975, 27) * sp * (1 / 8 + 1 / 21)^.5
## [1] -9.521097 20.361097

Pooled T-test and Paired T-test

data(sleep)
x1 <- sleep$extra[sleep$group == 1]
x2 <- sleep$extra[sleep$group == 2]
n1 <- length(x1)
n2 <- length(x2)
sp <- sqrt( ((n1 - 1) * sd(x1)^2 + (n2-1) * sd(x2)^2) / (n1 + n2-2))
md <- mean(x1) - mean(x2)
semd <- sp * sqrt(1 / n1 + 1/n2)
md + c(-1, 1) * qt(.975, n1 + n2 - 2) * semd
## [1] -3.363874  0.203874
t.test(x1, x2, paired = FALSE, var.equal = TRUE)$conf
## [1] -3.363874  0.203874
## attr(,"conf.level")
## [1] 0.95
t.test(x1, x2, paired = TRUE)$conf
## [1] -2.4598858 -0.7001142
## attr(,"conf.level")
## [1] 0.95
t.test(x1-x2)$conf
## [1] -2.4598858 -0.7001142
## attr(,"conf.level")
## [1] 0.95

Ignoring pairing


Unequal variances

  • Under unequal variances \[ \bar Y - \bar X \sim N\left(\mu_y - \mu_x, \frac{s_x^2}{n_x} + \frac{\sigma_y^2}{n_y}\right) \]
  • The statistic \[ \frac{\bar Y - \bar X - (\mu_y - \mu_x)}{\left(\frac{s_x^2}{n_x} + \frac{\sigma_y^2}{n_y}\right)^{1/2}} \] approximately follows Gosset’s \(t\) distribution with degrees of freedom equal to \[ \frac{\left(S_x^2 / n_x + S_y^2/n_y\right)^2} {\left(\frac{S_x^2}{n_x}\right)^2 / (n_x - 1) + \left(\frac{S_y^2}{n_y}\right)^2 / (n_y - 1)} \]

Example

  • Comparing SBP for 8 oral contraceptive users versus 21 controls
  • \(\bar X_{OC} = 132.86\) mmHg with \(s_{OC} = 15.34\) mmHg
  • \(\bar X_{C} = 127.44\) mmHg with \(s_{C} = 18.23\) mmHg
  • \(df=15.04\), \(t_{15.04, .975} = 2.13\)
  • Interval \[ 132.86 - 127.44 \pm 2.13 \left(\frac{15.34^2}{8} + \frac{18.23^2}{21} \right)^{1/2} = [-8.91, 19.75] \]
  • In R, t.test(..., var.equal = FALSE)

Comparing other kinds of data

  • For binomial data, there’s lots of ways to compare two groups
  • Relative risk, risk difference, odds ratio.
  • Chi-squared tests, normal approximations, exact tests.
  • For count data, there’s also Chi-squared tests and exact tests.
  • We’ll leave the discussions for comparing groups of data for binary and count data until covering glms in the regression class.
  • In addition, Mathematical Biostatistics Boot Camp 2 covers many special cases relevant to biostatistics.

Hypothesis testing

  • Hypothesis testing is concerned with making decisions using data
  • A null hypothesis is specified that represents the status quo, usually labeled \(H_0\)
  • The null hypothesis is assumed true and statistical evidence is required to reject it in favor of a research or alternative hypothesis

  • The alternative hypotheses are typically of the form \(<\), \(>\) or \(\neq\)
  • Note that there are four possible outcomes of our statistical decision process

Truth Decide Result
\(H_0\) \(H_0\) Correctly accept null
\(H_0\) \(H_a\) Type I error
\(H_a\) \(H_a\) Correctly reject null
\(H_a\) \(H_0\) Type II error

Discussion

  • Consider a court of law; the null hypothesis is that the defendant is innocent
  • We require evidence to reject the null hypothesis (convict)
  • If we require little evidence, then we would increase the percentage of innocent people convicted (type I errors); however we would also increase the percentage of guilty people convicted (correctly rejecting the null)
  • If we require a lot of evidence, then we increase the the percentage of innocent people let free (correctly accepting the null) while we would also increase the percentage of guilty people let free (type II errors)

Example

  • Consider our example again
  • A reasonable strategy would reject the null hypothesis if \(\bar X\) was larger than some constant, say \(C\)
  • Typically, \(C\) is chosen so that the probability of a Type I error, \(\alpha\), is \(.05\) (or some other relevant constant)
  • \(\alpha\) = Type I error rate = Probability of rejecting the null hypothesis when, in fact, the null hypothesis is correct

\[ \begin{align} 0.05 & = P\left(\bar X \geq C ~|~ \mu = 30 \right) \\ & = P\left(\frac{\bar X - 30}{10 / \sqrt{100}} \geq \frac{C - 30}{10/\sqrt{100}} ~|~ \mu = 30\right) \\ & = P\left(Z \geq \frac{C - 30}{1}\right) \\ \end{align} \]

  • Hence \((C - 30) / 1 = 1.645\) implying \(C = 31.645\)
  • Since our mean is \(32\) we reject the null hypothesis

Discussion

  • In general we don’t convert \(C\) back to the original scale
  • We would just reject because the Z-score; which is how many standard errors the sample mean is above the hypothesized mean \[ \frac{32 - 30}{10 / \sqrt{100}} = 2 \] is greater than \(1.645\)
  • Or, whenever \(\sqrt{n} (\bar X - \mu_0) / s > Z_{1-\alpha}\)

General rules

  • The \(Z\) test for \(H_0:\mu = \mu_0\) versus
  • \(H_1: \mu < \mu_0\)
  • \(H_2: \mu \neq \mu_0\)
  • \(H_3: \mu > \mu_0\)
  • Test statistic $ TS = $
  • Reject the null hypothesis when
  • \(TS \leq -Z_{1 - \alpha}\)
  • \(|TS| \geq Z_{1 - \alpha / 2}\)
  • \(TS \geq Z_{1 - \alpha}\)

Notes

  • We have fixed \(\alpha\) to be low, so if we reject \(H_0\) (either our model is wrong) or there is a low probability that we have made an error
  • We have not fixed the probability of a type II error, \(\beta\); therefore we tend to say ``Fail to reject \(H_0\)’’ rather than accepting \(H_0\)
  • Statistical significance is no the same as scientific significance
  • The region of TS values for which you reject \(H_0\) is called the rejection region

  • The \(Z\) test requires the assumptions of the CLT and for \(n\) to be large enough for it to apply
  • If \(n\) is small, then a Gossett’s \(T\) test is performed exactly in the same way, with the normal quantiles replaced by the appropriate Student’s \(T\) quantiles and \(n-1\) df
  • The probability of rejecting the null hypothesis when it is false is called power
  • Power is a used a lot to calculate sample sizes for experiments


Example reconsidered

  • Consider our example again. Suppose that \(n= 16\) (rather than \(100\)). Then consider that \[ .05 = P\left(\frac{\bar X - 30}{s / \sqrt{16}} \geq t_{1-\alpha, 15} ~|~ \mu = 30 \right) \]
  • So that our test statistic is now $(32 - 30) / 10 = 0.8 $, while the critical value is \(t_{1-\alpha, 15} = 1.75\).
  • We now fail to reject.

Two sided tests

  • Suppose that we would reject the null hypothesis if in fact the mean was too large or too small
  • That is, we want to test the alternative \(H_a : \mu \neq 30\) (doesn’t make a lot of sense in our setting)
  • Then note \[ \alpha = P\left(\left. \left|\frac{\bar X - 30}{s /\sqrt{16}}\right| > t_{1-\alpha/2,15} ~\right|~ \mu = 30\right) \]
  • That is we will reject if the test statistic, \(0.8\), is either too large or too small, but the critical value is calculated using \(\alpha / 2\)
  • In our example the critical value is \(2.13\), so we fail to reject.

T test in R

library(UsingR); data(father.son)
t.test(father.son$sheight - father.son$fheight)
## 
##  One Sample t-test
## 
## data:  father.son$sheight - father.son$fheight
## t = 11.789, df = 1077, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.8310296 1.1629160
## sample estimates:
## mean of x 
## 0.9969728

Connections with confidence intervals

  • Consider testing \(H_0: \mu = \mu_0\) versus \(H_a: \mu \neq \mu_0\)
  • Take the set of all possible values for which you fail to reject \(H_0\), this set is a \((1-\alpha)100\%\) confidence interval for \(\mu\)
  • The same works in reverse; if a \((1-\alpha)100\%\) interval contains \(\mu_0\), then we fail to reject \(H_0\)

Exact binomial test

  • Recall this problem, Suppose a friend has \(8\) children, \(7\) of which are girls and none are twins
  • Perform the relevant hypothesis test. \(H_0 : p = 0.5\) \(H_a : p > 0.5\)
  • What is the relevant rejection region so that the probability of rejecting is (less than) 5%?
Rejection region Type I error rate
[0 : 8] 1
[1 : 8] 0.9960938
[2 : 8] 0.9648438
[3 : 8] 0.8554688
[4 : 8] 0.6367187
[5 : 8] 0.3632813
[6 : 8] 0.1445313
[7 : 8] 0.0351563
[8 : 8] 0.0039062

Notes

  • It’s impossible to get an exact 5% level test for this case due to the discreteness of the binomial.
  • The closest is the rejection region [7 : 8]
  • Any alpha level lower than 0.0039062 is not attainable.
  • For larger sample sizes, we could do a normal approximation, but you already knew this.
  • Two sided test isn’t obvious.
  • Given a way to do two sided tests, we could take the set of values of \(p_0\) for which we fail to reject to get an exact binomial confidence interval (called the Clopper/Pearson interval, BTW)
  • For these problems, people always create a P-value (next lecture) rather than computing the rejection region.

P-values

  • Most common measure of “statistical significance”
  • Their ubiquity, along with concern over their interpretation and use makes them controversial among statisticians
  • http://warnercnr.colostate.edu/~anderson/thompson1.html
  • Also see Statistical Evidence: A Likelihood Paradigm by Richard Royall
  • Toward Evidence-Based Medical Statistics. 1: The P Value Fallacy by Steve Goodman
  • The hilariously titled: The Earth is Round (p < .05) by Cohen.
  • Some positive comments
  • simply statistics
  • normal deviate
  • Error statistics

What is a P-value?

Idea: Suppose nothing is going on - how unusual is it to see the estimate we got?

Approach:

  1. Define the hypothetical distribution of a data summary (statistic) when “nothing is going on” (null hypothesis)
  2. Calculate the summary/statistic with the data we have (test statistic)
  3. Compare what we calculated to our hypothetical distribution and see if the value is “extreme” (p-value)

P-values

  • The P-value is the probability under the null hypothesis of obtaining evidence as extreme or more extreme than would be observed by chance alone
  • If the P-value is small, then either \(H_0\) is true and we have observed a rare event or \(H_0\) is false
  • In our example the \(T\) statistic was \(0.8\).
  • What’s the probability of getting a \(T\) statistic as large as \(0.8\)?
pt(0.8, 15, lower.tail = FALSE)
## [1] 0.218099
  • Therefore, the probability of seeing evidence as extreme or more extreme than that actually obtained under \(H_0\) is 0.218099

The attained significance level

  • Our test statistic was \(2\) for \(H_0 : \mu_0 = 30\) versus \(H_a:\mu > 30\).
  • Notice that we rejected the one sided test when \(\alpha = 0.05\), would we reject if \(\alpha = 0.01\), how about \(0.001\)?
  • The smallest value for alpha that you still reject the null hypothesis is called the attained significance level
  • This is equivalent, but philosophically a little different from, the P-value

Notes

  • By reporting a P-value the reader can perform the hypothesis test at whatever \(\alpha\) level he or she choses
  • If the P-value is less than \(\alpha\) you reject the null hypothesis
  • For two sided hypothesis test, double the smaller of the two one sided hypothesis test Pvalues

Revisiting an earlier example

  • Suppose a friend has \(8\) children, \(7\) of which are girls and none are twins
  • If each gender has an independent \(50\)% probability for each birth, what’s the probability of getting \(7\) or more girls out of \(8\) births?
choose(8, 7) * .5 ^ 8 + choose(8, 8) * .5 ^ 8 
## [1] 0.03515625
pbinom(6, size = 8, prob = .5, lower.tail = FALSE)
## [1] 0.03515625

Poisson example

  • Suppose that a hospital has an infection rate of 10 infections per 100 person/days at risk (rate of 0.1) during the last monitoring period.
  • Assume that an infection rate of 0.05 is an important benchmark.
  • Given the model, could the observed rate being larger than 0.05 be attributed to chance?
  • Under \(H_0: \lambda = 0.05\) so that \(\lambda_0 100 = 5\)
  • Consider \(H_a: \lambda > 0.05\).
ppois(9, 5, lower.tail = FALSE)
## [1] 0.03182806

Power

  • Power is the probability of rejecting the null hypothesis when it is false
  • Ergo, power (as it’s name would suggest) is a good thing; you want more power
  • A type II error (a bad thing, as its name would suggest) is failing to reject the null hypothesis when it’s false; the probability of a type II error is usually called \(\beta\)
  • Note Power \(= 1 - \beta\)

Notes

  • Consider our previous example involving RDI
  • \(H_0: \mu = 30\) versus \(H_a: \mu > 30\)
  • Then power is \[P\left(\frac{\bar X - 30}{s /\sqrt{n}} > t_{1-\alpha,n-1} ~|~ \mu = \mu_a \right)\]
  • Note that this is a function that depends on the specific value of \(\mu_a\)!
  • Notice as \(\mu_a\) approaches \(30\) the power approaches \(\alpha\)

Calculating power for Gaussian data

Assume that \(n\) is large and that we know \(\sigma\) \[ \begin{align} 1 -\beta & = P\left(\frac{\bar X - 30}{\sigma /\sqrt{n}} > z_{1-\alpha} ~|~ \mu = \mu_a \right)\\ & = P\left(\frac{\bar X - \mu_a + \mu_a - 30}{\sigma /\sqrt{n}} > z_{1-\alpha} ~|~ \mu = \mu_a \right)\\ \\ & = P\left(\frac{\bar X - \mu_a}{\sigma /\sqrt{n}} > z_{1-\alpha} - \frac{\mu_a - 30}{\sigma /\sqrt{n}} ~|~ \mu = \mu_a \right)\\ \\ & = P\left(Z > z_{1-\alpha} - \frac{\mu_a - 30}{\sigma /\sqrt{n}} ~|~ \mu = \mu_a \right)\\ \\ \end{align} \]


Example continued

  • Suppose that we wanted to detect a increase in mean RDI of at least 2 events / hour (above 30).
  • Assume normality and that the sample in question will have a standard deviation of \(4\);
  • What would be the power if we took a sample size of \(16\)?
  • \(Z_{1-\alpha} = 1.645\)
  • \(\frac{\mu_a - 30}{\sigma /\sqrt{n}} = 2 / (4 /\sqrt{16}) = 2\)
  • \(P(Z > 1.645 - 2) = P(Z > -0.355) = 64\%\)
pnorm(-0.355, lower.tail = FALSE)
## [1] 0.6387052

Note

  • Consider \(H_0 : \mu = \mu_0\) and \(H_a : \mu > \mu_0\) with \(\mu = \mu_a\) under \(H_a\).
  • Under \(H_0\) the statistic \(Z = \frac{\sqrt{n}(\bar X - \mu_0)}{\sigma}\) is \(N(0, 1)\)
  • Under \(H_a\) \(Z\) is \(N\left( \frac{\sqrt{n}(\mu_a - \mu_0)}{\sigma}, 1\right)\)
  • We reject if \(Z > Z_{1-\alpha}\)

sigma <- 10; mu_0 = 0; mu_a = 2; n <- 100; alpha = .05
plot(c(-3, 6),c(0, dnorm(0)), type = "n", xlab = "Z value", ylab = "")
xvals <- seq(-3, 6, length = 1000)
lines(xvals, dnorm(xvals), type = "l", lwd = 3)
lines(xvals, dnorm(xvals, mean = sqrt(n) * (mu_a - mu_0) / sigma), lwd =3)
abline(v = qnorm(1 - alpha))


Question

  • When testing \(H_a : \mu > \mu_0\), notice if power is \(1 - \beta\), then \[1 - \beta = P\left(Z > z_{1-\alpha} - \frac{\mu_a - \mu_0}{\sigma /\sqrt{n}} ~|~ \mu = \mu_a \right) = P(Z > z_{\beta})\]
  • This yields the equation \[z_{1-\alpha} - \frac{\sqrt{n}(\mu_a - \mu_0)}{\sigma} = z_{\beta}\]
  • Unknowns: \(\mu_a\), \(\sigma\), \(n\), \(\beta\)
  • Knowns: \(\mu_0\), \(\alpha\)
  • Specify any 3 of the unknowns and you can solve for the remainder

Notes

  • The calculation for \(H_a:\mu < \mu_0\) is similar
  • For \(H_a: \mu \neq \mu_0\) calculate the one sided power using \(\alpha / 2\) (this is only approximately right, it excludes the probability of getting a large TS in the opposite direction of the truth)
  • Power goes up as \(\alpha\) gets larger
  • Power of a one sided test is greater than the power of the associated two sided test
  • Power goes up as \(\mu_1\) gets further away from \(\mu_0\)
  • Power goes up as \(n\) goes up
  • Power doesn’t need \(\mu_a\), \(\sigma\) and \(n\), instead only \(\frac{\sqrt{n}(\mu_a - \mu_0)}{\sigma}\)
  • The quantity \(\frac{\mu_a - \mu_0}{\sigma}\) is called the effect size, the difference in the means in standard deviation units.
  • Being unit free, it has some hope of interpretability across settings

T-test power

  • Consider calculating power for a Gossett’s \(T\) test for our example
  • The power is \[ P\left(\frac{\bar X - \mu_0}{S /\sqrt{n}} > t_{1-\alpha, n-1} ~|~ \mu = \mu_a \right) \]
  • Calcuting this requires the non-central t distribution.
  • power.t.test does this very well
  • Omit one of the arguments and it solves for it

Example

power.t.test(n = 16, delta = 2 / 4, sd=1, type = "one.sample",  alt = "one.sided")$power
## [1] 0.6040329
power.t.test(n = 16, delta = 2, sd=4, type = "one.sample",  alt = "one.sided")$power
## [1] 0.6040329
power.t.test(n = 16, delta = 100, sd=200, type = "one.sample", alt = "one.sided")$power
## [1] 0.6040329

Example

power.t.test(power = .8, delta = 2 / 4, sd=1, type = "one.sample",  alt = "one.sided")$n
## [1] 26.13751
power.t.test(power = .8, delta = 2, sd=4, type = "one.sample",  alt = "one.sided")$n
## [1] 26.13751
power.t.test(power = .8, delta = 100, sd=200, type = "one.sample", alt = "one.sided")$n
## [1] 26.13751

Multiple testing

Key ideas

  • Hypothesis testing/significance analysis is commonly overused
  • Correcting for multiple testing avoids false positives or discoveries
  • Two key components
  • Error measure
  • Correction

Three eras of statistics

The age of Quetelet and his successors, in which huge census-level data sets were brought to bear on simple but important questions: Are there more male than female births? Is the rate of insanity rising?

The classical period of Pearson, Fisher, Neyman, Hotelling, and their successors, intellectual giants who developed a theory of optimal inference capable of wringing every drop of information out of a scientific experiment. The questions dealt with still tended to be simple Is treatment A better than treatment B?

The era of scientific mass production, in which new technologies typified by the microarray allow a single team of scientists to produce data sets of a size Quetelet would envy. But now the flood of data is accompanied by a deluge of questions, perhaps thousands of estimates or hypothesis tests that the statistician is charged with answering together; not at all what the classical masters had in mind. Which variables matter among the thousands measured? How do you relate unrelated information?

http://www-stat.stanford.edu/~ckirby/brad/papers/2010LSIexcerpt.pdf


Types of errors

Suppose you are testing a hypothesis that a parameter \(\beta\) equals zero versus the alternative that it does not equal zero. These are the possible outcomes.

Type I error or false positive (\(V\)) Say that the parameter does not equal zero when it does

Type II error or false negative (\(T\)) Say that the parameter equals zero when it doesn’t


Error rates

False positive rate - The rate at which false results (\(\beta = 0\)) are called significant: \(E\left[\frac{V}{m_0}\right]\)*

Family wise error rate (FWER) - The probability of at least one false positive \({\rm Pr}(V \geq 1)\)

False discovery rate (FDR) - The rate at which claims of significance are false \(E\left[\frac{V}{R}\right]\)


Controlling the false positive rate

If P-values are correctly calculated calling all \(P < \alpha\) significant will control the false positive rate at level \(\alpha\) on average.

Problem: Suppose that you perform 10,000 tests and \(\beta = 0\) for all of them.

Suppose that you call all \(P < 0.05\) significant.

The expected number of false positives is: \(10,000 \times 0.05 = 500\) false positives.

How do we avoid so many false positives?


Controlling family-wise error rate (FWER)

The Bonferroni correction is the oldest multiple testing correction.

Basic idea: * Suppose you do \(m\) tests * You want to control FWER at level \(\alpha\) so \(Pr(V \geq 1) < \alpha\) * Calculate P-values normally * Set \(\alpha_{fwer} = \alpha/m\) * Call all \(P\)-values less than \(\alpha_{fwer}\) significant

Pros: Easy to calculate, conservative Cons: May be very conservative


Controlling false discovery rate (FDR)

This is the most popular correction when performing lots of tests say in genomics, imaging, astronomy, or other signal-processing disciplines.

Basic idea: * Suppose you do \(m\) tests * You want to control FDR at level \(\alpha\) so \(E\left[\frac{V}{R}\right]\) * Calculate P-values normally * Order the P-values from smallest to largest \(P_{(1)},...,P_{(m)}\) * Call any \(P_{(i)} \leq \alpha \times \frac{i}{m}\) significant

Pros: Still pretty easy to calculate, less conservative (maybe much less)

Cons: Allows for more false positives, may behave strangely under dependence


Adjusted P-values

  • One approach is to adjust the threshold \(\alpha\)
  • A different approach is to calculate “adjusted p-values”
  • They are not p-values anymore
  • But they can be used directly without adjusting \(\alpha\)

Example: * Suppose P-values are \(P_1,\ldots,P_m\) * You could adjust them by taking \(P_i^{fwer} = \max{m \times P_i,1}\) for each P-value. * Then if you call all \(P_i^{fwer} < \alpha\) significant you will control the FWER.


Case study I: no true positives

set.seed(1010093)
pValues <- rep(NA,1000)
for(i in 1:1000){
  y <- rnorm(20)
  x <- rnorm(20)
  pValues[i] <- summary(lm(y ~ x))$coeff[2,4]
}

# Controls false positive rate
sum(pValues < 0.05)
## [1] 51

Case study I: no true positives

# Controls FWER 
sum(p.adjust(pValues,method="bonferroni") < 0.05)
## [1] 0
# Controls FDR 
sum(p.adjust(pValues,method="BH") < 0.05)
## [1] 0

Case study II: 50% true positives

set.seed(1010093)
pValues <- rep(NA,1000)
for(i in 1:1000){
  x <- rnorm(20)
  # First 500 beta=0, last 500 beta=2
  if(i <= 500){y <- rnorm(20)}else{ y <- rnorm(20,mean=2*x)}
  pValues[i] <- summary(lm(y ~ x))$coeff[2,4]
}
trueStatus <- rep(c("zero","not zero"),each=500)
table(pValues < 0.05, trueStatus)
##        trueStatus
##         not zero zero
##   FALSE        0  476
##   TRUE       500   24

Case study II: 50% true positives

# Controls FWER 
table(p.adjust(pValues,method="bonferroni") < 0.05,trueStatus)
##        trueStatus
##         not zero zero
##   FALSE       23  500
##   TRUE       477    0
# Controls FDR 
table(p.adjust(pValues,method="BH") < 0.05,trueStatus)
##        trueStatus
##         not zero zero
##   FALSE        0  487
##   TRUE       500   13

Case study II: 50% true positives

P-values versus adjusted P-values

par(mfrow=c(1,2))
plot(pValues,p.adjust(pValues,method="bonferroni"),pch=19)
plot(pValues,p.adjust(pValues,method="BH"),pch=19)


Notes and resources

Notes: * Multiple testing is an entire subfield * A basic Bonferroni/BH correction is usually enough * If there is strong dependence between tests there may be problems * Consider method=“BY”

Further resources: * Multiple testing procedures with applications to genomics * Statistical significance for genome-wide studies * Introduction to multiple testing


Resampled inference

The jackknife

  • The jackknife is a tool for estimating standard errors and the bias of estimators
  • As its name suggests, the jackknife is a small, handy tool; in contrast to the bootstrap, which is then the moral equivalent of a giant workshop full of tools
  • Both the jackknife and the bootstrap involve resampling data; that is, repeatedly creating new data sets from the original data

  • The jackknife deletes each observation and calculates an estimate based on the remaining \(n-1\) of them
  • It uses this collection of estimates to do things like estimate the bias and the standard error
  • Note that estimating the bias and having a standard error are not needed for things like sample means, which we know are unbiased estimates of population means and what their standard errors are

  • We’ll consider the jackknife for univariate data
  • Let \(X_1,\ldots,X_n\) be a collection of data used to estimate a parameter \(\theta\)
  • Let \(\hat \theta\) be the estimate based on the full data set
  • Let \(\hat \theta_{i}\) be the estimate of \(\theta\) obtained by deleting observation \(i\)
  • Let \(\bar \theta = \frac{1}{n}\sum_{i=1}^n \hat \theta_{i}\)

  • Then, the jackknife estimate of the bias is \[ (n - 1) \left(\bar \theta - \hat \theta\right) \] (how far the average delete-one estimate is from the actual estimate)
  • The jackknife estimate of the standard error is \[ \left[\frac{n-1}{n}\sum_{i=1}^n (\hat \theta_i - \bar\theta )^2\right]^{1/2} \] (the deviance of the delete-one estimates from the average delete-one estimate)

Example

  • We want to estimate the bias and standard error of the median
library(UsingR)
data(father.son)
x <- father.son$sheight
n <- length(x)
theta <- median(x)
jk <- sapply(1 : n,
             function(i) median(x[-i])
             )
thetaBar <- mean(jk)
biasEst <- (n - 1) * (thetaBar - theta) 
seEst <- sqrt((n - 1) * mean((jk - thetaBar)^2))

c(biasEst, seEst)
## [1] 0.0000000 0.1014066
library(bootstrap)
temp <- jackknife(x, median)
c(temp$jack.bias, temp$jack.se)
## [1] 0.0000000 0.1014066

  • Both methods (of course) yield an estimated bias of 0 and a se of 0.1014066
  • Odd little fact: the jackknife estimate of the bias for the median is always \(0\) when the number of observations is even
  • It has been shown that the jackknife is a linear approximation to the bootstrap
  • Generally do not use the jackknife for sample quantiles like the median; as it has been shown to have some poor properties

Pseudo observations

  • Another interesting way to think about the jackknife uses pseudo observations
  • Let \[ \mbox{Pseudo Obs} = n \hat \theta - (n - 1) \hat \theta_{i} \]
  • Think of these as ``whatever observation \(i\) contributes to the estimate of \(\theta\)’’
  • Note when \(\hat \theta\) is the sample mean, the pseudo observations are the data themselves
  • Then the sample standard error of these observations is the previous jackknife estimated standard error.
  • The mean of these observations is a bias-corrected estimate of \(\theta\)

The bootstrap

  • The bootstrap is a tremendously useful tool for constructing confidence intervals and calculating standard errors for difficult statistics
  • For example, how would one derive a confidence interval for the median?
  • The bootstrap procedure follows from the so called bootstrap principle

The bootstrap principle

  • Suppose that I have a statistic that estimates some population parameter, but I don’t know its sampling distribution
  • The bootstrap principle suggests using the distribution defined by the data to approximate its sampling distribution

The bootstrap in practice

  • In practice, the bootstrap principle is always carried out using simulation
  • We will cover only a few aspects of bootstrap resampling
  • The general procedure follows by first simulating complete data sets from the observed data with replacement

  • This is approximately drawing from the sampling distribution of that statistic, at least as far as the data is able to approximate the true population distribution

  • Calculate the statistic for each simulated data set
  • Use the simulated statistics to either define a confidence interval or take the standard deviation to calculate a standard error


Nonparametric bootstrap algorithm example

  • Bootstrap procedure for calculating confidence interval for the median from a data set of \(n\) observations
  1. Sample \(n\) observations with replacement from the observed data resulting in one simulated complete data set

  2. Take the median of the simulated data set

  3. Repeat these two steps \(B\) times, resulting in \(B\) simulated medians

  4. These medians are approximately drawn from the sampling distribution of the median of \(n\) observations; therefore we can

    • Draw a histogram of them
    • Calculate their standard deviation to estimate the standard error of the median
    • Take the \(2.5^{th}\) and \(97.5^{th}\) percentiles as a confidence interval for the median

Example code

B <- 1000
resamples <- matrix(sample(x,
                           n * B,
                           replace = TRUE),
                    B, n)
medians <- apply(resamples, 1, median)
sd(medians)
## [1] 0.08465921
quantile(medians, c(.025, .975))
##     2.5%    97.5% 
## 68.41383 68.81415

Histogram of bootstrap resamples

hist(medians)


Notes on the bootstrap

  • The bootstrap is non-parametric
  • Better percentile bootstrap confidence intervals correct for bias
  • There are lots of variations on bootstrap procedures; the book “An Introduction to the Bootstrap”" by Efron and Tibshirani is a great place to start for both bootstrap and jackknife information
library(boot)
stat <- function(x, i) {median(x[i])}  
boot.out <- boot(data = gmVol,
                 statistic = stat,
                 R = 1000)
boot.ci(boot.out)

Group comparisons

  • Consider comparing two independent groups.
  • Example, comparing sprays B and C
data(InsectSprays)
boxplot(count ~ spray, data = InsectSprays)


Permutation tests

  • Consider the null hypothesis that the distribution of the observations from each group is the same
  • Then, the group labels are irrelevant
  • We then discard the group levels and permute the combined data
  • Split the permuted data into two groups with \(n_A\) and \(n_B\) observations (say by always treating the first \(n_A\) observations as the first group)
  • Evaluate the probability of getting a statistic as large or large than the one observed
  • An example statistic would be the difference in the averages between the two groups; one could also use a t-statistic

Variations on permutation testing

Data type Statistic Test name
Ranks rank sum rank sum test
Binary hypergeometric prob Fisher’s exact test
Raw data ordinary permutation test

  • Also, so-called randomization tests are exactly permutation tests, with a different motivation.
  • For matched data, one can randomize the signs
  • For ranks, this results in the signed rank test
  • Permutation strategies work for regression as well
  • Permuting a regressor of interest
  • Permutation tests work very well in multivariate settings

Permutation test for pesticide data

subdata <- InsectSprays[InsectSprays$spray %in% c("B", "C"),]
y <- subdata$count
group <- as.character(subdata$spray)
testStat <- function(w, g) mean(w[g == "B"]) - mean(w[g == "C"])
observedStat <- testStat(y, group)
permutations <- sapply(1 : 10000, function(i) testStat(y, sample(group)))
observedStat
## [1] 13.25
mean(permutations > observedStat)
## [1] 0

Histogram of permutations

hist(permutations)


Regression Models


Introduction

Finding the middle via least squares

data

library(UsingR); data(galton)
par(mfrow=c(1,2))
hist(galton$child,col="blue",breaks=100)
hist(galton$parent,col="blue",breaks=100)


  • Consider only the children’s heights.
  • How could one describe the “middle”?
  • One definition, let \(Y_i\) be the height of child \(i\) for \(i = 1, \ldots, n = 928\), then define the middle as the value of \(\mu\) that minimizes \[\sum_{i=1}^n (Y_i - \mu)^2\]
  • This is physical center of mass of the histrogram.
  • You might have guessed that the answer \(\mu = \bar X\).

Use R studio’s manipulate to see what value of \(\mu\) minimizes the sum of the squared deviations.

library(manipulate)
myHist <- function(mu){
  hist(galton$child,col="blue",breaks=100)
  lines(c(mu, mu), c(0, 150),col="red",lwd=5)
  mse <- mean((galton$child - mu)^2)
  text(63, 150, paste("mu = ", mu))
  text(63, 140, paste("MSE = ", round(mse, 2)))
}
manipulate(myHist(mu), mu = slider(62, 74, step = 0.5))

The least squares estimate is the empirical mean

  hist(galton$child,col="blue",breaks=100)
  meanChild <- mean(galton$child)
  lines(rep(meanChild,100),seq(0,150,length=100),col="red",lwd=5)


The math follows as:

\[ \begin{align} \sum_{i=1}^n (Y_i - \mu)^2 & = \ \sum_{i=1}^n (Y_i - \bar Y + \bar Y - \mu)^2 \\ & = \sum_{i=1}^n (Y_i - \bar Y)^2 + \ 2 \sum_{i=1}^n (Y_i - \bar Y) (\bar Y - \mu) +\ \sum_{i=1}^n (\bar Y - \mu)^2 \\ & = \sum_{i=1}^n (Y_i - \bar Y)^2 + \ 2 (\bar Y - \mu) \sum_{i=1}^n (Y_i - \bar Y) +\ \sum_{i=1}^n (\bar Y - \mu)^2 \\ & = \sum_{i=1}^n (Y_i - \bar Y)^2 + \ 2 (\bar Y - \mu) (\sum_{i=1}^n Y_i - n \bar Y) +\ \sum_{i=1}^n (\bar Y - \mu)^2 \\ & = \sum_{i=1}^n (Y_i - \bar Y)^2 + \sum_{i=1}^n (\bar Y - \mu)^2\\ & \geq \sum_{i=1}^n (Y_i - \bar Y)^2 \ \end{align} \]


Comparing childrens’ heights and their parents’ heights

plot(galton$parent,galton$child,pch=19,col="blue")


  • Size of point represents number of points at that (X, Y) combination.
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
plot(as.numeric(as.vector(freqData$parent)), 
     as.numeric(as.vector(freqData$child)),
     pch = 21, col = "black", bg = "lightblue",
     cex = .05 * freqData$freq, 
     xlab = "parent", ylab = "child")


Regression through the origin

  • Suppose that \(X_i\) are the parents’ heights.
  • Consider picking the slope \(\beta\) that minimizes \[\sum_{i=1}^n (Y_i - X_i \beta)^2\]
  • This is exactly using the origin as a pivot point picking the line that minimizes the sum of the squared vertical distances of the points to the line
  • Use R studio’s manipulate function to experiment
  • Subtract the means so that the origin is the mean of the parent and children’s heights

myPlot <- function(beta){
  y <- galton$child - mean(galton$child)
  x <- galton$parent - mean(galton$parent)
  freqData <- as.data.frame(table(x, y))
  names(freqData) <- c("child", "parent", "freq")
  plot(
    as.numeric(as.vector(freqData$parent)), 
    as.numeric(as.vector(freqData$child)),
    pch = 21, col = "black", bg = "lightblue",
    cex = .15 * freqData$freq, 
    xlab = "parent", 
    ylab = "child"
    )
  abline(0, beta, lwd = 3)
  points(0, 0, cex = 2, pch = 19)
  mse <- mean( (y - beta * x)^2 )
  title(paste("beta = ", beta, "mse = ", round(mse, 3)))
}
manipulate(myPlot(beta), beta = slider(0.6, 1.2, step = 0.02))

The solution

lm(I(child - mean(child))~ I(parent - mean(parent)) - 1, data = galton)
## 
## Call:
## lm(formula = I(child - mean(child)) ~ I(parent - mean(parent)) - 
##     1, data = galton)
## 
## Coefficients:
## I(parent - mean(parent))  
##                   0.6463

Visualizing the best fit line

  • Size of points are frequencies at that X, Y combination
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
plot(as.numeric(as.vector(freqData$parent)), 
     as.numeric(as.vector(freqData$child)),
     pch = 21, col = "black", bg = "lightblue",
     cex = .05 * freqData$freq, 
     xlab = "parent", ylab = "child")
lm1 <- lm(galton$child ~ galton$parent)
lines(galton$parent,lm1$fitted,col="red",lwd=3)


Notations

Some basic definitions

  • In this module, we’ll cover some basic definitions and notation used throughout the class.
  • We will try to minimize the amount of mathematics required for this class.
  • No caclculus is required.

Notation for data

  • We write \(X_1, X_2, \ldots, X_n\) to describe \(n\) data points.
  • As an example, consider the data set \(\{1, 2, 5\}\) then
  • \(X_1 = 1\), \(X_2 = 2\), \(X_3 = 5\) and \(n = 3\).
  • We often use a different letter than \(X\), such as \(Y_1, \ldots , Y_n\).
  • We will typically use Greek letters for things we don’t know. Such as, \(\mu\) is a mean that we’d like to estimate.
  • We will use capital letters for conceptual values of the variables and lowercase letters for realized values.
  • So this way we can write \(P(X_i > x)\).
  • \(X_i\) is a conceptual random variable.
  • \(x\) is a number that we plug into.

The empirical mean

  • Define the empirical mean as \[ \bar X = \frac{1}{n}\sum_{i=1}^n X_i. \]
  • Notice if we subtract the mean from data points, we get data that has mean 0. That is, if we define \[ \tilde X_i = X_i - \bar X. \] The the mean of the \(\tilde X_i\) is 0.
  • This process is called “centering” the random variables.
  • The mean is a measure of central tendancy of the data.
  • Recall from the previous lecture that the mean is the least squares solution for minimizing \[ \sum_{i=1}^n (X_i - \mu)^2 \]

The emprical standard deviation and variance

  • Define the empirical variance as \[ S^2 = \frac{1}{n-1} \sum_{i=1}^n (X_i - \bar X)^2 = \frac{1}{n-1} \left( \sum_{i=1}^n X_i^2 - n \bar X ^ 2 \right) \]
  • The empirical standard deviation is defined as \(S = \sqrt{S^2}\). Notice that the standard deviation has the same units as the data.
  • The data defined by \(X_i / s\) have empirical standard deviation 1. This is called “scaling” the data.
  • The empirical standard deviation is a measure of spread.
  • Sometimes people divide by \(n\) rather than \(n-1\) (the latter produces an unbiased estimate.)

Normalization

  • The the data defined by \[ Z_i = \frac{X_i - \bar X}{s} \] have empirical mean zero and empirical standard deviation 1.
  • The process of centering then scaling the data is called “normalizing” the data.
  • Normalized data are centered at 0 and have units equal to standard deviations of the original data.
  • Example, a value of 2 form normalized data means that data point was two standard deviations larger than the mean.

The empirical covariance

  • Consider now when we have pairs of data, \((X_i, Y_i)\).
  • Their empirical covariance is \[ Cov(X, Y) = \frac{1}{n-1}\sum_{i=1}^n (X_i - \bar X) (Y_i - \bar Y) = \frac{1}{n-1}\left( \sum_{i=1}^n X_i Y_i - n \bar X \bar Y\right) \]
  • Some people prefer to divide by \(n\) rather than \(n-1\) (the latter produces an unbiased estimate.)
  • The correlation is defined is \[ Cor(X, Y) = \frac{Cov(X, Y)}{S_x S_y} \] where \(S_x\) and \(S_y\) are the estimates of standard deviations for the \(X\) observations and \(Y\) observations, respectively.

Some facts about correlation

  • \(Cor(X, Y) = Cor(Y, X)\)
  • \(-1 \leq Cor(X, Y) \leq 1\)
  • \(Cor(X,Y) = 1\) and \(Cor(X, Y) = -1\) only when the \(X\) or \(Y\) observations fall perfectly on a positive or negative sloped line, respectively.
  • \(Cor(X, Y)\) measures the strength of the linear relationship between the \(X\) and \(Y\) data, with stronger relationships as \(Cor(X,Y)\) heads towards -1 or 1.
  • \(Cor(X, Y) = 0\) implies no linear relationship.

Least squares estimation of regression lines

Regression via least squares

General least squares for linear equations

Consider again the parent and child height data from Galton

library(UsingR)
data(galton)
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
plot(as.numeric(as.vector(freqData$parent)), 
     as.numeric(as.vector(freqData$child)),
     pch = 21, col = "black", bg = "lightblue",
     cex = .05 * freqData$freq, 
     xlab = "parent", ylab = "child")


Fitting the best line

  • Let \(Y_i\) be the \(i^{th}\) child’s height and \(X_i\) be the \(i^{th}\) (average over the pair of) parents’ heights.
  • Consider finding the best line
  • Child’s Height = \(\beta_0\) + Parent’s Height \(\beta_1\)
  • Use least squares \[ \sum_{i=1}^n \{Y_i - (\beta_0 + \beta_1 X_i)\}^2 \]

Let’s solve this problem generally

  • Let \(\mu_i = \beta_0 + \beta_1 X_i\) and our estimates be \(\hat \mu_i = \hat \beta_0 + \hat \beta_1 X_i\).
  • We want to minimize \[ \dagger \sum_{i=1}^n (Y_i - \mu_i)^2 = \sum_{i=1}^n (Y_i - \hat \mu_i) ^ 2 + 2 \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) + \sum_{i=1}^n (\hat \mu_i - \mu_i)^2\]
  • Suppose that \[\sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = 0\] then \[ \dagger =\sum_{i=1}^n (Y_i - \hat \mu_i) ^ 2 + \sum_{i=1}^n (\hat \mu_i - \mu_i)^2\geq \sum_{i=1}^n (Y_i - \hat \mu_i) ^ 2\]

Mean only regression

  • So we know that if: \[ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = 0\] where \(\mu_i = \beta_0 + \beta_1 X_i\) and \(\hat \mu_i = \hat \beta_0 + \hat \beta_1 X_i\) then the line \[Y = \hat \beta_0 + \hat \beta_1 X\] is the least squares line.
  • Consider forcing \(\beta_1 = 0\) and thus \(\hat \beta_1=0\); that is, only considering horizontal lines
  • The solution works out to be \[\hat \beta_0 = \bar Y.\]

Let’s show it

\[\begin{align} \ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = & \sum_{i=1}^n (Y_i - \hat \beta_0) (\hat \beta_0 - \beta_0) \\ = & (\hat \beta_0 - \beta_0) \sum_{i=1}^n (Y_i - \hat \beta_0) \ \end{align} \]

Thus, this will equal 0 if \(\sum_{i=1}^n (Y_i - \hat \beta_0) = n\bar Y - n \hat \beta_0=0\)

Thus \(\hat \beta_0 = \bar Y.\)


Regression through the origin

  • Recall that if: \[ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = 0\] where \(\mu_i = \beta_0 + \beta_1 X_i\) and \(\hat \mu_i = \hat \beta_0 + \hat \beta_1 X_i\) then the line \[Y = \hat \beta_0 + \hat \beta_1 X\] is the least squares line.
  • Consider forcing \(\beta_0 = 0\) and thus \(\hat \beta_0=0\); that is, only considering lines through the origin
  • The solution works out to be \[\hat \beta_1 = \frac{\sum_{i=1^n} Y_i X_i}{\sum_{i=1}^n X_i^2}.\]

Let’s show it

\[\begin{align} \ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = & \sum_{i=1}^n (Y_i - \hat \beta_1 X_i) (\hat \beta_1 X_i - \beta_1 X_i) \\ = & (\hat \beta_1 - \beta_1) \sum_{i=1}^n (Y_i X_i - \hat \beta_1 X_i ^2) \ \end{align} \]

Thus, this will equal 0 if \(\sum_{i=1}^n (Y_i X_i - \hat \beta_1 X_i ^2) = \sum_{i=1}^n Y_i X_i - \hat \beta_1 \sum_{i=1}^n X_i^2 =0\)

Thus \[\hat \beta_1 = \frac{\sum_{i=1^n} Y_i X_i}{\sum_{i=1}^n X_i^2}.\]


Recapping what we know

  • If we define \(\mu_i = \beta_0\) then \(\hat \beta_0 = \bar Y\).
  • If we only look at horizontal lines, the least squares estimate of the intercept of that line is the average of the outcomes.
  • If we define \(\mu_i = X_i \beta_1\) then \(\hat \beta_1 = \frac{\sum_{i=1^n} Y_i X_i}{\sum_{i=1}^n X_i^2}\)
  • If we only look at lines through the origin, we get the estimated slope is the cross product of the X and Ys divided by the cross product of the Xs with themselves.
  • What about when \(\mu_i = \beta_0 + \beta_1 X_i\)? That is, we don’t want to restrict ourselves to horizontal lines or lines through the origin.

\[\begin{align} \ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = & \sum_{i=1}^n (Y_i - \hat\beta_0 - \hat\beta_1 X_i) (\hat \beta_0 + \hat \beta_1 X_i - \beta_0 - \beta_1 X_i) \\ = & (\hat \beta_0 - \beta_0) \sum_{i=1}^n (Y_i - \hat\beta_0 - \hat \beta_1 X_i) + (\beta_1 - \beta_1)\sum_{i=1}^n (Y_i - \hat\beta_0 - \hat \beta_1 X_i)X_i\\ \end{align} \] Note that

\[0=\sum_{i=1}^n (Y_i - \hat\beta_0 - \hat \beta_1 X_i) = n \bar Y - n \hat \beta_0 - n \hat \beta_1 \bar X ~~\mbox{implies that}~~\hat \beta_0 = \bar Y - \hat \beta_1 \bar X \]

Then \[\sum_{i=1}^n (Y_i - \hat\beta_0 - \hat \beta_1 X_i) X_i = \sum_{i=1}^n (Y_i - \bar Y + \hat \beta_1 \bar X - \hat \beta_1 X_i)X_i\]


\[=\sum_{i=1}^n \{(Y_i - \bar Y) - \hat \beta_1 (X_i - \bar X) \}X_i\] And thus \[ \sum_{i=1}^n (Y_i - \bar Y)X_i - \hat \beta_1 \sum_{i=1}^n (X_i - \bar X) X_i = 0.\] So we arrive at \[ \hat \beta_1 = \frac{\sum_{i=1}^n \{(Y_i - \bar Y)X_i}{\sum_{i=1}^n (X_i - \bar X) X_i} = \frac{\sum_{i=1}^n (Y_i - \bar Y)(X_i - \bar X)}{\sum_{i=1}^n (X_i - \bar X) (X_i - \bar X)} = Cor(Y, X) \frac{Sd(Y)}{Sd(X)}. \] And recall \[ \hat \beta_0 = \bar Y - \hat \beta_1 \bar X. \]


Consequences

  • The least squares model fit to the line \(Y = \beta_0 + \beta_1 X\) through the data pairs \((X_i, Y_i)\) with \(Y_i\) as the outcome obtains the line \(Y = \hat \beta_0 + \hat \beta_1 X\) where \[\hat \beta_1 = Cor(Y, X) \frac{Sd(Y)}{Sd(X)} ~~~ \hat \beta_0 = \bar Y - \hat \beta_1 \bar X\]
  • \(\hat \beta_1\) has the units of \(Y / X\), \(\hat \beta_0\) has the units of \(Y\).
  • The line passes through the point \((\bar X, \bar Y\))
  • The slope of the regression line with \(X\) as the outcome and \(Y\) as the predictor is \(Cor(Y, X) Sd(X)/ Sd(Y)\).
  • The slope is the same one you would get if you centered the data, \((X_i - \bar X, Y_i - \bar Y)\), and did regression through the origin.
  • If you normalized the data, \(\{ \frac{X_i - \bar X}{Sd(X)}, \frac{Y_i - \bar Y}{Sd(Y)}\}\), the slope is \(Cor(Y, X)\).

Revisiting Galton’s data

  • Double check our calculations using R
y <- galton$child
x <- galton$parent
beta1 <- cor(y, x) *  sd(y) / sd(x)
beta0 <- mean(y) - beta1 * mean(x)
rbind(c(beta0, beta1), coef(lm(y ~ x)))
##      (Intercept)         x
## [1,]    23.94153 0.6462906
## [2,]    23.94153 0.6462906
  • Reversing the outcome/predictor relationship
beta1 <- cor(y, x) *  sd(x) / sd(y)
beta0 <- mean(x) - beta1 * mean(y)
rbind(c(beta0, beta1), coef(lm(x ~ y)))
##      (Intercept)         y
## [1,]    46.13535 0.3256475
## [2,]    46.13535 0.3256475
  • Regression through the origin yields an equivalent slope if you center the data first
yc <- y - mean(y)
xc <- x - mean(x)
beta1 <- sum(yc * xc) / sum(xc ^ 2)
c(beta1, coef(lm(y ~ x))[2])
##                   x 
## 0.6462906 0.6462906
  • Normalizing variables results in the slope being the correlation
yn <- (y - mean(y))/sd(y)
xn <- (x - mean(x))/sd(x)
c(cor(y, x), cor(yn, xn), coef(lm(yn ~ xn))[2])
##                            xn 
## 0.4587624 0.4587624 0.4587624

Plotting the fit

  • Size of points are frequencies at that X, Y combination.
  • For the red lie the child is outcome.
  • For the blue, the parent is the outcome (accounting for the fact that the response is plotted on the horizontal axis).
  • Black line assumes \(Cor(Y, X) = 1\) (slope is \(Sd(Y)/Sd(x)\)).
  • Big black dot is \((\bar X, \bar Y)\).

The code to add the lines

freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
plot(as.numeric(as.vector(freqData$parent)), 
     as.numeric(as.vector(freqData$child)),
     pch = 21, col = "black", bg = "lightblue",
     cex = .05 * freqData$freq, 
     xlab = "parent", ylab = "child", xlim = c(62, 74), ylim = c(62, 74))
abline(mean(y) - mean(x) * cor(y, x) * sd(y) / sd(x), sd(y) / sd(x) * cor(y, x), lwd = 3, col = "red")
abline(mean(y) - mean(x) * sd(y) / sd(x) / cor(y, x), sd(y) / sd(x) / cor(y, x), lwd = 3, col = "blue")
abline(mean(y) - mean(x) * sd(y) / sd(x), sd(y) / sd(x), lwd = 2)
points(mean(x), mean(y), cex = 2, pch = 19)


Historical side note, Regression to Mediocrity

Regression to the mean

  • Invented by Francis Galton in the paper “Regression towards mediocrity in hereditary stature” The Journal of the Anthropological Institute of Great Britain and Ireland , Vol. 15, (1886).
  • Think of it this way, imagine if you simulated pairs of random normals
  • The largest first ones would be the largest by chance, and the probability that there are smaller for the second simulation is high.
  • In other words \(P(Y < x | X = x)\) gets bigger as \(x\) heads into the very large values.
  • Similarly \(P(Y > x | X = x)\) gets bigger as \(x\) heads to very small values.
  • Think of the regression line as the intrisic part.
  • Unless \(Cor(Y, X) = 1\) the intrinsic part isn’t perfect

  • Suppose that we normalize \(X\) (child’s height) and \(Y\) (parent’s height) so that they both have mean 0 and variance 1.
  • Then, recall, our regression line passes through \((0, 0)\) (the mean of the X and Y).
  • If the slope of the regression line is \(Cor(Y,X)\), regardless of which variable is the outcome (recall, both standard deviations are 1).
  • Notice if \(X\) is the outcome and you create a plot where \(X\) is the horizontal axis, the slope of the least squares line that you plot is \(1/Cor(Y, X)\).

Normalizing the data and setting plotting parameters

library(UsingR)
data(father.son)
y <- (father.son$sheight - mean(father.son$sheight)) / sd(father.son$sheight)
x <- (father.son$fheight - mean(father.son$fheight)) / sd(father.son$fheight)
rho <- cor(x, y)
myPlot <- function(x, y) {
  plot(x, y, 
       xlab = "Father's height, normalized",
       ylab = "Son's height, normalized",
       xlim = c(-3, 3), ylim = c(-3, 3),
       bg = "lightblue", col = "black", cex = 1.1, pch = 21, 
       frame = FALSE)
}

Plot the data

myPlot(x, y)
abline(0, 1) # if there were perfect correlation
abline(0, rho, lwd = 2) # father predicts son
abline(0, 1 / rho, lwd = 2) # son predicts father, son on vertical axis
abline(h = 0); abline(v = 0) # reference lines for no relathionship


Discussion

  • If you had to predict a son’s normalized height, it would be \(Cor(Y, X) * X_i\)
  • If you had to predict a father’s normalized height, it would be \(Cor(Y, X) * Y_i\)
  • Multiplication by this correlation shrinks toward 0 (regression toward the mean)
  • If the correlation is 1 there is no regression to the mean (if father’s height perfectly determine’s child’s height and vice versa)
  • Note, regression to the mean has been thought about quite a bit and generalized

Statistical linear regression models

Basic regression model with additive Gaussian errors.

  • Least squares is an estimation tool, how do we do inference?
  • Consider developing a probabilistic model for linear regression \[ Y_i = \beta_0 + \beta_1 X_i + \epsilon_{i} \]
  • Here the \(\epsilon_{i}\) are assumed iid \(N(0, \sigma^2)\).
  • Note, \(E[Y_i ~|~ X_i = x_i] = \mu_i = \beta_0 + \beta_1 x_i\)
  • Note, \(Var(Y_i ~|~ X_i = x_i) = \sigma^2\).
  • Likelihood equivalent model specification is that the \(Y_i\) are independent \(N(\mu_i, \sigma^2)\).

  • Likelihood

\[ {\cal L}(\beta, \sigma) = \prod_{i=1}^n \left\{(2 \pi \sigma^2)^{-1/2}\exp\left(-\frac{1}{2\sigma^2}(y_i - \mu_i)^2 \right) \right\} \] so that the twice the negative log (base e) likelihood is \[ -2 \log\{ {\cal L}(\beta, \sigma) \} = \frac{1}{\sigma^2} \sum_{i=1}^n (y_i - \mu_i)^2 + n\log(\sigma^2) \] Discussion * Maximizing the likelihood is the same as minimizing -2 log likelihood * The least squares estimate for \(\mu_i = \beta_0 + \beta_1 x_i\) is exactly the maximimum likelihood estimate (regardless of \(\sigma\))


  • Model \(Y_i = \mu_i + \epsilon_i = \beta_0 + \beta_1 X_i + \epsilon_i\) where \(\epsilon_i\) are iid \(N(0, \sigma^2)\)
  • ML estimates of \(\beta_0\) and \(\beta_1\) are the least squares estimates \[\hat \beta_1 = Cor(Y, X) \frac{Sd(Y)}{Sd(X)} ~~~ \hat \beta_0 = \bar Y - \hat \beta_1 \bar X\]
  • \(E[Y ~|~ X = x] = \beta_0 + \beta_1 x\)
  • \(Var(Y ~|~ X = x) = \sigma^2\)

Interpretting regression coefficients, the itc

  • \(\beta_0\) is the expected value of the response when the predictor is 0 \[ E[Y | X = 0] = \beta_0 + \beta_1 \times 0 = \beta_0 \]
  • Note, this isn’t always of interest, for example when \(X=0\) is impossible or far outside of the range of data. (X is blood pressure, or height etc.)
  • Consider that \[ Y_i = \beta_0 + \beta_1 X_i + \epsilon_i = \beta_0 + a \beta_1 + \beta_1 (X_i - a) + \epsilon_i = \tilde \beta_0 + \beta_1 (X_i - a) + \epsilon_i \] So, shifting you \(X\) values by value \(a\) changes the intercept, but not the slope.
  • Often \(a\) is set to \(\bar X\) so that the intercept is interpretted as the expected response at the average \(X\) value.

Interpretting regression coefficients, the slope

  • \(\beta_1\) is the expected change in response for a 1 unit change in the predictor \[ E[Y ~|~ X = x+1] - E[Y ~|~ X = x] = \beta_0 + \beta_1 (x + 1) - (\beta_0 + \beta_1 x ) = \beta_1 \]
  • Consider the impact of changing the units of \(X\). \[ Y_i = \beta_0 + \beta_1 X_i + \epsilon_i = \beta_0 + \frac{\beta_1}{a} (X_i a) + \epsilon_i = \beta_0 + \tilde \beta_1 (X_i a) + \epsilon_i \]
  • Therefore, multiplication of \(X\) by a factor \(a\) results in dividing the coefficient by a factor of \(a\).
  • Example: \(X\) is height in \(m\) and \(Y\) is weight in \(kg\). Then \(\beta_1\) is \(kg/m\). Converting \(X\) to \(cm\) implies multiplying \(X\) by \(100 cm/m\). To get \(\beta_1\) in the right units, we have to divide by \(100 cm /m\) to get it to have the right units. \[ X m \times \frac{100cm}{m} = (100 X) cm ~~\mbox{and}~~ \beta_1 \frac{kg}{m} \times\frac{1 m}{100cm} = \left(\frac{\beta_1}{100}\right)\frac{kg}{cm} \]

Using regression coeficients for prediction

  • If we would like to guess the outcome at a particular value of the predictor, say \(X\), the regression model guesses \[ \hat \beta_0 + \hat \beta_1 X \]
  • Note that at the observed value of \(X\)s, we obtain the predictions \[ \hat \mu_i = \hat Y_i = \hat \beta_0 + \hat \beta_1 X_i \]
  • Remember that least squares minimizes \[ \sum_{i=1}^n (Y_i - \mu_i) \] for \(\mu_i\) expressed as points on a line

Example

  • diamond data set from UsingR

Data is diamond prices (Signapore dollars) and diamond weight in carats (standard measure of diamond mass, 0.2 \(g\)). To get the data use `library(UsingR);

  • Plotting the fitted regression line and data
library(UsingR)
data(diamond)
plot(diamond$carat, diamond$price,  
     xlab = "Mass (carats)", 
     ylab = "Price (SIN $)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)
abline(lm(price ~ carat, data = diamond), lwd = 2)


Fitting the linear regression model

fit <- lm(price ~ carat, data = diamond)
coef(fit)
## (Intercept)       carat 
##   -259.6259   3721.0249
  • We estimate an expected 3721.02 (SIN) dollar increase in price for every carat increase in mass of diamond.
  • The intercept -259.63 is the expected price of a 0 carat diamond.

Getting a more interpretable intercept

fit2 <- lm(price ~ I(carat - mean(carat)), data = diamond)
coef(fit2)
##            (Intercept) I(carat - mean(carat)) 
##               500.0833              3721.0249

Thus $500.1 is the expected price for the average sized diamond of the data (0.2041667 carats).


Changing scale

  • A one carat increase in a diamond is pretty big, what about changing units to 1/10th of a carat?
  • We can just do this by just dividing the coeficient by 10.
  • We expect a 372.102 (SIN) dollar change in price for every 1/10th of a carat increase in mass of diamond.
  • Showing that it’s the same if we rescale the Xs and refit
fit3 <- lm(price ~ I(carat * 10), data = diamond)
coef(fit3)
##   (Intercept) I(carat * 10) 
##     -259.6259      372.1025

Predicting the price of a diamond

newx <- c(0.16, 0.27, 0.34)
coef(fit)[1] + coef(fit)[2] * newx
## [1]  335.7381  745.0508 1005.5225
predict(fit, newdata = data.frame(carat = newx))
##         1         2         3 
##  335.7381  745.0508 1005.5225

Predicted values at the observed Xs (red) and at the new Xs (lines)

data(diamond)
plot(diamond$carat, diamond$price,  
     xlab = "Mass (carats)", 
     ylab = "Price (SIN $)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)
abline(fit, lwd = 2)
points(diamond$carat, predict(fit), pch = 19, col = "red")
lines(c(0.16, 0.16, 0.12), 
      c(200, coef(fit)[1] + coef(fit)[2] * 0.16,
      coef(fit)[1] + coef(fit)[2] * 0.16))
lines(c(0.27, 0.27, 0.12), 
      c(200, coef(fit)[1] + coef(fit)[2] * 0.27,
        coef(fit)[1] + coef(fit)[2] * 0.27))
lines(c(0.34, 0.34, 0.12), 
      c(200, coef(fit)[1] + coef(fit)[2] * 0.34,
        coef(fit)[1] + coef(fit)[2] * 0.34))
text(newx, rep(250, 3), labels = newx, pos = 2)


Residuals and residual variation

Residuals

  • Model \(Y_i = \beta_0 + \beta_1 X_i + \epsilon_i\) where \(\epsilon_i \sim N(0, \sigma^2)\).
  • Observed outcome \(i\) is \(Y_i\) at predictor value \(X_i\)
  • Predicted outcome \(i\) is \(\hat Y_i\) at predictor valuve \(X_i\) is \[ \hat Y_i = \hat \beta_0 + \hat \beta_1 X_i \]
  • Residual, the between the observed and predicted outcome \[ e_i = Y_i - \hat Y_i \]
  • The vertical distance between the observed data point and the regression line
  • Least squares minimizes \(\sum_{i=1}^n e_i^2\)
  • The \(e_i\) can be thought of as estimates of the \(\epsilon_i\).

Properties of the residuals

  • \(E[e_i] = 0\).
  • If an intercept is included, \(\sum_{i=1}^n e_i = 0\)
  • If a regressor variable, \(X_i\), is included in the model \(\sum_{i=1}^n e_i X_i = 0\).
  • Residuals are useful for investigating poor model fit.
  • Positive residuals are above the line, negative residuals are below.
  • Residuals can be thought of as the outcome (\(Y\)) with the linear association of the predictor (\(X\)) removed.
  • One differentiates residual variation (variation after removing the predictor) from systematic variation (variation explained by the regression model).
  • Residual plots highlight poor model fit.

library(UsingR)
data(diamond)
y <- diamond$price; x <- diamond$carat; n <- length(y)
fit <- lm(y ~ x)
e <- resid(fit)
yhat <- predict(fit)
max(abs(e -(y - yhat)))
## [1] 9.485746e-13
max(abs(e - (y - coef(fit)[1] - coef(fit)[2] * x)))
## [1] 9.485746e-13

Residuals are the signed length of the red lines

plot(diamond$carat, diamond$price,  
     xlab = "Mass (carats)", 
     ylab = "Price (SIN $)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)
abline(fit, lwd = 2)
for (i in 1 : n) 
  lines(c(x[i], x[i]), c(y[i], yhat[i]), col = "red" , lwd = 2)


Residuals versus X

plot(diamond$carat, e,  
     xlab = "Mass (carats)", 
     ylab = "Residuals (SIN $)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)
abline(h = 0, lwd = 2)
for (i in 1 : n) 
  lines(c(x[i], x[i]), c(e[i], 0), col = "red" , lwd = 2)


Non-linear data

x <- runif(100, -3, 3); y <- x + sin(x) + rnorm(100, sd = .2); 
plot(x, y); abline(lm(y ~ x))


plot(x, resid(lm(y ~ x))); 
abline(h = 0)


Heteroskedasticity

x <- runif(100, 0, 6); y <- x + rnorm(100,  mean = 0, sd = .001 * x); 
plot(x, y); abline(lm(y ~ x))


*Getting rid of the blank space can be helpful

plot(x, resid(lm(y ~ x))); 
abline(h = 0)


Estimating residual variation

  • Model \(Y_i = \beta_0 + \beta_1 X_i + \epsilon_i\) where \(\epsilon_i \sim N(0, \sigma^2)\).
  • The ML estimate of \(\sigma^2\) is \(\frac{1}{n}\sum_{i=1}^n e_i^2\), the average squared residual.
  • Most people use \[ \hat \sigma^2 = \frac{1}{n-2}\sum_{i=1}^n e_i^2. \]
  • The \(n-2\) instead of \(n\) is so that \(E[\hat \sigma^2] = \sigma^2\)

  • Diamond example
y <- diamond$price; x <- diamond$carat; n <- length(y)
fit <- lm(y ~ x)
summary(fit)$sigma
## [1] 31.84052
sqrt(sum(resid(fit)^2) / (n - 2))
## [1] 31.84052

Summarizing variation

\[ \begin{align} \sum_{i=1}^n (Y_i - \bar Y)^2 & = \sum_{i=1}^n (Y_i - \hat Y_i + \hat Y_i - \bar Y)^2 \\ & = \sum_{i=1}^n (Y_i - \hat Y_i)^2 + 2 \sum_{i=1}^n (Y_i - \hat Y_i)(\hat Y_i - \bar Y) + \sum_{i=1}^n (\hat Y_i - \bar Y)^2 \\ \end{align} \]


Scratch work

\((Y_i - \hat Y_i) = \{Y_i - (\bar Y - \hat \beta_1 \bar X) - \hat \beta_1 X_i\} = (Y_i - \bar Y) - \hat \beta_1 (X_i - \bar X)\)

\((\hat Y_i - \bar Y) = (\bar Y - \hat \beta_1 \bar X - \hat \beta_1 X_i - \bar Y ) = \hat \beta_1 (X_i - \bar X)\)

\(\sum_{i=1}^n (Y_i - \hat Y_i)(\hat Y_i - \bar Y) = \sum_{i=1}^n \{(Y_i - \bar Y) - \hat \beta_1 (X_i - \bar X))\}\{\hat \beta_1 (X_i - \bar X)\}\)

\(=\hat \beta_1 \sum_{i=1}^n (Y_i - \bar Y)(X_i - \bar X) -\hat\beta_1^2\sum_{i=1}^n (X_i - \bar X)^2\)

\(= \hat \beta_1^2 \sum_{i=1}^n (X_i - \bar X)^2-\hat\beta_1^2\sum_{i=1}^n (X_i - \bar X)^2 = 0\)


Summarizing variation

\[ \sum_{i=1}^n (Y_i - \bar Y)^2 = \sum_{i=1}^n (Y_i - \hat Y_i)^2 + \sum_{i=1}^n (\hat Y_i - \bar Y)^2 \]

Or

Total Variation = Residual Variation + Regression Variation

Define the percent of total varation described by the model as \[ R^2 = \frac{\sum_{i=1}^n (\hat Y_i - \bar Y)^2}{\sum_{i=1}^n (Y_i - \bar Y)^2} = 1 - \frac{\sum_{i=1}^n (Y_i - \hat Y_i)^2}{\sum_{i=1}^n (Y_i - \bar Y)^2} \]


Relation between R-squared \(R^2\) and \(r\) (the corrrelation)

Recall that \((\hat Y_i - \bar Y) = \hat \beta_1 (X_i - \bar X)\) so that \[ R^2 = \frac{\sum_{i=1}^n (\hat Y_i - \bar Y)^2}{\sum_{i=1}^n (Y_i - \bar Y)^2} = \hat \beta_1^2 \frac{\sum_{i=1}^n(X_i - \bar X)}{\sum_{i=1}^n (Y_i - \bar Y)^2} = Cor(Y, X)^2 \] Since, recall, \[ \hat \beta_1 = Cor(Y, X)\frac{Sd(Y)}{Sd(X)} \] So, \(R^2\) is literally \(r\) squared.


Some facts about \(R^2\)

  • \(R^2\) is the percentage of variation explained by the regression model.
  • \(0 \leq R^2 \leq 1\)
  • \(R^2\) is the sample correlation squared.
  • \(R^2\) can be a misleading summary of model fit.
  • Deleting data can inflate \(R^2\).
  • (For later.) Adding terms to a regression model always increases \(R^2\).
  • Do example(anscombe) to see the following data.
  • Basically same mean and variance of X and Y.
  • Identical correlations (hence same \(R^2\) ).
  • Same linear regression relationship.

data(anscombe);example(anscombe)

require(stats); require(graphics); data(anscombe)
ff <- y ~ x
mods <- setNames(as.list(1:4), paste0("lm", 1:4))
for(i in 1:4) {
  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
  ## or   ff[[2]] <- as.name(paste0("y", i))
  ##      ff[[3]] <- as.name(paste0("x", i))
  mods[[i]] <- lmi <- lm(ff, data = anscombe)
  #print(anova(lmi))
}
## Now, do what you should have done in the first place: PLOTS
op <- par(mfrow = c(2, 2), mar = 0.1+c(4,4,1,1), oma =  c(0, 0, 2, 0))
for(i in 1:4) {
  ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
  plot(ff, data = anscombe, col = "red", pch = 21, bg = "orange", cex = 1.2,
       xlim = c(3, 19), ylim = c(3, 13))
  abline(mods[[i]], col = "blue")
}
mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)

par(op)

Inference in regression

Recall our model and fitted values

  • Consider the model \[ Y_i = \beta_0 + \beta_1 X_i + \epsilon_i \]
  • \(\epsilon \sim N(0, \sigma^2)\).
  • We assume that the true model is known.
  • We assume that you’ve seen confidence intervals and hypothesis tests before.
  • \(\hat \beta_0 = \bar Y - \hat \beta_1 \bar X\)
  • \(\hat \beta_1 = Cor(Y, X) \frac{Sd(Y)}{Sd(X)}\).

  • Statistics like \(\frac{\hat \theta - \theta}{\hat \sigma_{\hat \theta}}\) often have the following properties.
    1. Is normally distributed and has a finite sample Student’s T distribution if the estimated variance is replaced with a sample estimate (under normality assumptions).
    2. Can be used to test \(H_0 : \theta = \theta_0\) versus \(H_a : \theta >, <, \neq \theta_0\).
    3. Can be used to create a confidence interval for \(\theta\) via \(\hat \theta \pm Q_{1-\alpha/2} \hat \sigma_{\hat \theta}\) where \(Q_{1-\alpha/2}\) is the relevant quantile from either a normal or T distribution.
  • In the case of regression with iid sampling assumptions and normal errors, our inferences will follow very similarily to what you saw in your inference class.
  • We won’t cover asymptotics for regression analysis, but suffice it to say that under assumptions on the ways in which the \(X\) values are collected, the iid sampling model, and mean model, the normal results hold to create intervals and confidence intervals

Standard errors (conditioned on X)

\[ \begin{align} Var(\hat \beta_1) & = Var\left(\frac{\sum_{i=1}^n (Y_i - \bar Y) (X_i - \bar X)}{\sum_{i=1}^n (X_i - \bar X)^2}\right) \\ & = \frac{Var\left(\sum_{i=1}^n Y_i (X_i - \bar X) \right) }{\left(\sum_{i=1}^n (X_i - \bar X)^2 \right)^2} \\ & = \frac{\sum_{i=1}^n \sigma^2(X_i - \bar X)^2}{\left(\sum_{i=1}^n (X_i - \bar X)^2 \right)^2} \\ & = \frac{\sigma^2}{\sum_{i=1}^n (X_i - \bar X)^2} \\ \end{align} \]


Results

  • \(\sigma_{\hat \beta_1}^2 = Var(\hat \beta_1) = \sigma^2 / \sum_{i=1}^n (X_i - \bar X)^2\)
  • \(\sigma_{\hat \beta_0}^2 = Var(\hat \beta_0) = \left(\frac{1}{n} + \frac{\bar X^2}{\sum_{i=1}^n (X_i - \bar X)^2 }\right)\sigma^2\)
  • In practice, \(\sigma\) is replaced by its estimate.
  • It’s probably not surprising that under iid Gaussian errors \[ \frac{\hat \beta_j - \beta_j}{\hat \sigma_{\hat \beta_j}} \] follows a \(t\) distribution with \(n-2\) degrees of freedom and a normal distribution for large \(n\).
  • This can be used to create confidence intervals and perform hypothesis tests.

Example diamond data set

library(UsingR); data(diamond)
y <- diamond$price; x <- diamond$carat; n <- length(y)
beta1 <- cor(y, x) * sd(y) / sd(x)
beta0 <- mean(y) - beta1 * mean(x)
e <- y - beta0 - beta1 * x
sigma <- sqrt(sum(e^2) / (n-2)) 
ssx <- sum((x - mean(x))^2)
seBeta0 <- (1 / n + mean(x) ^ 2 / ssx) ^ .5 * sigma 
seBeta1 <- sigma / sqrt(ssx)
tBeta0 <- beta0 / seBeta0; tBeta1 <- beta1 / seBeta1
pBeta0 <- 2 * pt(abs(tBeta0), df = n - 2, lower.tail = FALSE)
pBeta1 <- 2 * pt(abs(tBeta1), df = n - 2, lower.tail = FALSE)
coefTable <- rbind(c(beta0, seBeta0, tBeta0, pBeta0), c(beta1, seBeta1, tBeta1, pBeta1))
colnames(coefTable) <- c("Estimate", "Std. Error", "t value", "P(>|t|)")
rownames(coefTable) <- c("(Intercept)", "x")

  • Example continued
coefTable
##              Estimate Std. Error   t value      P(>|t|)
## (Intercept) -259.6259   17.31886 -14.99094 2.523271e-19
## x           3721.0249   81.78588  45.49715 6.751260e-40
fit <- lm(y ~ x); 
summary(fit)$coefficients
##              Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) -259.6259   17.31886 -14.99094 2.523271e-19
## x           3721.0249   81.78588  45.49715 6.751260e-40

  • Getting a confidence interval
sumCoef <- summary(fit)$coefficients
sumCoef[1,1] + c(-1, 1) * qt(.975, df = fit$df) * sumCoef[1, 2]
## [1] -294.4870 -224.7649
sumCoef[2,1] + c(-1, 1) * qt(.975, df = fit$df) * sumCoef[2, 2]
## [1] 3556.398 3885.651

With 95% confidence, we estimate that a 0.1 carat increase in diamond size results in a 355.6 to 388.6 increase in price in (Singapore) dollars.


Prediction of outcomes

  • Consider predicting \(Y\) at a value of \(X\)
  • Predicting the price of a diamond given the carat
  • Predicting the height of a child given the height of the parents
  • The obvious estimate for prediction at point \(x_0\) is \[ \hat \beta_0 + \hat \beta_1 x_0 \]
  • A standard error is needed to create a prediction interval.
  • There’s a distinction between intervals for the regression line at point \(x_0\) and the prediction of what a \(y\) would be at point \(x_0\).
  • Line at \(x_0\) se, \(\hat \sigma\sqrt{\frac{1}{n} + \frac{(x_0 - \bar X)^2}{\sum_{i=1}^n (X_i - \bar X)^2}}\)
  • Prediction interval se at \(x_0\), \(\hat \sigma\sqrt{1 + \frac{1}{n} + \frac{(x_0 - \bar X)^2}{\sum_{i=1}^n (X_i - \bar X)^2}}\)

Plotting the prediction intervals

plot(x, y, frame=FALSE,xlab="Carat",ylab="Dollars",pch=21,col="black", bg="lightblue", cex=1.5)
abline(fit, lwd = 2)
xVals <- seq(min(x), max(x), by = .01)
yVals <- beta0 + beta1 * xVals
se1 <- sigma * sqrt(1 / n + (xVals - mean(x))^2/ssx)
se2 <- sigma * sqrt(1 + 1 / n + (xVals - mean(x))^2/ssx)
lines(xVals, yVals + 2 * se1)
lines(xVals, yVals - 2 * se1)
lines(xVals, yVals + 2 * se2)
lines(xVals, yVals - 2 * se2)


Discussion

  • Both intervals have varying widths.
  • Least width at the mean of the Xs.
  • We are quite confident in the regression line, so that interval is very narrow.
  • If we knew \(\beta_0\) and \(\beta_1\) this interval would have zero width.
  • The prediction interval must incorporate the variabilibity in the data around the line.
  • Even if we knew \(\beta_0\) and \(\beta_1\) this interval would still have width.

in R

newdata <- data.frame(x = xVals)
p1 <- predict(fit, newdata, interval = ("confidence"))
p2 <- predict(fit, newdata, interval = ("prediction"))
plot(x, y, frame=FALSE,xlab="Carat",ylab="Dollars",pch=21,col="black", bg="lightblue", cex=1.5)
abline(fit, lwd = 2)
lines(xVals, p1[,2]); lines(xVals, p1[,3])
lines(xVals, p2[,2]); lines(xVals, p2[,3])


Multivariable regression

Multivariable regression analyses

  • If I were to present evidence of a relationship between breath mint useage (mints per day, X) and pulmonary function (measured in FEV), you would be skeptical.
  • Likely, you would say, ‘smokers tend to use more breath mints than non smokers, smoking is related to a loss in pulmonary function. That’s probably the culprit.’
  • If asked what would convince you, you would likely say, ‘If non-smoking breath mint users had lower lung function than non-smoking non-breath mint users and, similarly, if smoking breath mint users had lower lung function than smoking non-breath mint users, I’d be more inclined to believe you’.
  • In other words, to even consider my results, I would have to demonstrate that they hold while holding smoking status fixed.

  • An insurance company is interested in how last year’s claims can predict a person’s time in the hospital this year.
  • They want to use an enormous amount of data contained in claims to predict a single number. Simple linear regression is not equipped to handle more than one predictor.
  • How can one generalize SLR to incoporate lots of regressors for the purpose of prediction?
  • What are the consequences of adding lots of regressors?
  • Surely there must be consequences to throwing variables in that aren’t related to Y?
  • Surely there must be consequences to omitting variables that are?

The linear model

  • The general linear model extends simple linear regression (SLR) by adding terms linearly into the model. \[ Y_i = \beta_1 X_{1i} + \beta_2 X_{2i} + \ldots + \beta_{p} X_{pi} + \epsilon_{i} = \sum_{k=1}^p X_{ik} \beta_j + \epsilon_{i} \]
  • Here \(X_{1i}=1\) typically, so that an intercept is included.
  • Least squares (and hence ML estimates under iid Gaussianity of the errors) minimizes \[ \sum_{i=1}^n \left(Y_i - \sum_{k=1}^p X_{ki} \beta_j\right)^2 \]
  • Note, the important linearity is linearity in the coefficients. Thus \[ Y_i = \beta_1 X_{1i}^2 + \beta_2 X_{2i}^2 + \ldots + \beta_{p} X_{pi}^2 + \epsilon_{i} \] is still a linear model. (We’ve just squared the elements of the predictor variables.)

How to get estimates

  • The real way requires linear algebra. We’ll go over an intuitive development instead.
  • Recall that the LS estimate for regression through the origin, \(E[Y_i]=X_{1i}\beta_1\), was \(\sum X_i Y_i / \sum X_i^2\).
  • Let’s consider two regressors, \(E[Y_i] = X_{1i}\beta_1 + X_{2i}\beta_2 = \mu_i\).
  • Also, recall, that if \(\hat \mu_i\) satisfies \[ \sum_{i=1} (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = 0 \] for all possible values of \(\mu_i\), then we’ve found the LS estimates.

\[ \sum_{i=1}^n (Y_i - \hat \mu_i) (\hat \mu_i - \mu_i) = \sum_{i=1}^n (Y_i - \hat \beta_1 X_{1i} - \hat \beta_2 X_{2i}) \left\{X_{1i}(\hat \beta_1 - \beta_1) + X_{2i}(\hat \beta_2 - \beta_2) \right\} \]

  • Thus we need
  1. \(\sum_{i=1}^n (Y_i - \hat \beta_1 X_{1i} - \hat \beta_2 X_{2i}) X_{1i} = 0\)
  2. \(\sum_{i=1}^n (Y_i - \hat \beta_1 X_{1i} - \hat \beta_2 X_{2i}) X_{2i} = 0\)
  • Hold \(\hat \beta_1\) fixed in 2. and solve and we get that \[ \hat \beta_2 = \frac{\sum_{i=1} (Y_i - X_{1i}\hat \beta_1)X_{2i}}{\sum_{i=1}^n X_{2i}^2} \]
  • Plugging this into 1. we get that \[ 0 = \sum_{i=1}^n \left\{Y_i - \frac{\sum_j X_{2j}Y_j}{\sum_j X_{2j}^2}X_{2i} + \beta_1 \left(X_{1i} - \frac{\sum_j X_{2j}X_{1j}}{\sum_j X_{2j}^2} X_{2i}\right)\right\} X_{1i} \]

  • Re writing this we get \[ 0 = \sum_{i=1}^n \left\{ e_{i, Y | X_2} - \hat \beta_1 e_{i, X_1 | X_2} \right\} X_{1i} \] where \(e_{i, a | b} = a_i - \frac{\sum_{j=1}^n a_j b_j }{\sum_{i=1}^n b_j^2} b_i\) is the residual when regressing \(b\) from \(a\) without an intercept.
  • We get the solution \[ \hat \beta_1 = \frac{\sum_{i=1}^n e_{i, Y | X_2} e_{i, X_1 | X_2}}{\sum_{i=1}^n e_{i, X_1 | X_2} X_1} \]

  • But note that \[ \sum_{i=1}^n e_{i, X_1 | X_2}^2 = \sum_{i=1}^n e_{i, X_1 | X_2} \left(X_{1i} - \frac{\sum_j X_{2j}X_{1j}}{\sum_j X_{2j}^2} X_{2i}\right) \] \[ = \sum_{i=1}^n e_{i, X_1 | X_2} X_{1i} - \frac{\sum_j X_{2j}X_{1j}}{\sum_j X_{2j}^2} \sum_{i=1}^n e_{i, X_1 | X_2} X_{2i} \] But \(\sum_{i=1}^n e_{i, X_1 | X_2} X_{2i} = 0\). So we get that \[ \sum_{i=1}^n e_{i, X_1 | X_2}^2 = \sum_{i=1}^n e_{i, X_1 | X_2} X_{1i} \] Thus we get that \[ \hat \beta_1 = \frac{\sum_{i=1}^n e_{i, Y | X_2} e_{i, X_1 | X_2}}{\sum_{i=1}^n e_{i, X_1 | X_2}^2} \]

Summing up fitting with two regressors

\[\hat \beta_1 = \frac{\sum_{i=1}^n e_{i, Y | X_2} e_{i, X_1 | X_2}}{\sum_{i=1}^n e_{i, X_1 | X_2}^2}\] * That is, the regression estimate for \(\beta_1\) is the regression through the origin estimate having regressed \(X_2\) out of both the response and the predictor. * (Similarly, the regression estimate for \(\beta_2\) is the regression through the origin estimate having regressed \(X_1\) out of both the response and the predictor.) * More generally, multivariate regression estimates are exactly those having removed the linear relationship of the other variables from both the regressor and response.


Example with two variables, simple linear regression

  • \(Y_{i} = \beta_1 X_{1i} + \beta_2 X_{2i}\) where \(X_{2i} = 1\) is an intercept term.
  • Then \(\frac{\sum_j X_{2j}X_{1j}}{\sum_j X_{2j}^2}X_{2i} = \frac{\sum_j X_{1j}}{n} = \bar X_1\).
  • \(e_{i, X_1 | X_2} = X_{1i} - \bar X_1\).
  • Simiarly \(e_{i, Y | X_2} = Y_i - \bar Y\).
  • Thus \[ \hat \beta_1 = \frac{\sum_{i=1}^n e_{i, Y | X_2} e_{i, X_1 | X_2}}{\sum_{i=1}^n e_{i, X_1 | X_2}^2} = \frac{\sum_{i=1}^n (X_i - \bar X)(Y_i - \bar Y)}{\sum_{i=1}^n (X_i - \bar X)^2} = Cor(X, Y) \frac{Sd(Y)}{Sd(X)} \]

The general case

  • The equations \[ \sum_{i=1}^n (Y_i - X_{1i}\hat \beta_1 - \ldots - X_{ip}\hat \beta_p) X_k = 0 \] for \(k = 1, \ldots, p\) yields \(p\) equations with \(p\) unknowns.
  • Solving them yields the least squares estimates. (With obtaining a good, fast, general solution requiring some knowledge of linear algebra.)
  • The least squares estimate for the coefficient of a multivariate regression model is exactly regression through the origin with the linear relationships with the other regressors removed from both the regressor and outcome by taking residuals.
  • In this sense, multivariate regression “adjusts” a coefficient for the linear impact of the other variables.

Fitting LS equations

Just so I don’t leave you hanging, let’s show a way to get estimates. Recall the equations: \[ \sum_{i=1}^n (Y_i - X_{1i}\hat \beta_1 - \ldots - X_{ip}\hat \beta_p) X_k = 0 \] If I hold \(\hat \beta_1, \ldots, \hat \beta_{p-1}\) fixed then we get that \[ \hat \beta_p = \frac{\sum_{i=1}^n (Y_i - X_{1i}\hat \beta_1 - \ldots - X_{i,p-1}\hat \beta_{p-1}) X_{ip} }{\sum_{i=1}^n X_{ip}^2} \] Plugging this back into the equations, we wind up with \[ \sum_{i=1}^n (e_{i,Y|X_p} - e_{i, X_{1} | X_p} \hat \beta_1 - \ldots - e_{i, X_{p-1} | X_{p}} \hat \beta_{p-1}) X_k = 0 \]


We can tidy it up a bit more, though

Note that \[ X_k = e_{i,X_k|X_p} + \frac{\sum_{i=1}^n X_{ik} X_{ip}}{\sum_{i=1}^n X_{ip}^2} X_p \] and \(\sum_{i=1}^n e_{i,X_j | X_p} X_{ip} = 0\). Thus \[ \sum_{i=1}^n (e_{i,Y|X_p} - e_{i, X_{1} | X_p} \hat \beta_1 - \ldots - e_{i, X_{p-1} | X_{p}} \hat \beta_{p-1}) X_k = 0 \] is equal to \[ \sum_{i=1}^n (e_{i,Y|X_p} - e_{i, X_{1} | X_p} \hat \beta_1 - \ldots - e_{i, X_{p-1} | X_{p}} \hat \beta_{p-1}) e_{i,X_k|X_p} = 0 \]


To sum up

  • We’ve reduced \(p\) LS equations and \(p\) unknowns to \(p-1\) LS equations and \(p-1\) unknowns.
  • Every variable has been replaced by its residual with \(X_p\).
  • This process can then be iterated until only Y and one variable remains.
  • Think of it as follows. If we want an adjusted relationship between y and x, keep taking residuals over confounders and do regression through the origin.
  • The order that you do the confounders doesn’t matter.
  • (It can’t because our choice of doing \(p\) first was arbitrary.)
  • This isn’t a terribly efficient way to get estimates. But, it’s nice conceputally, as it shows how regression estimates are adjusted for the linear relationship with other variables.

Demonstration that it works using an example

  • Linear model with two variables and an intercept
n <- 100; x <- rnorm(n); x2 <- rnorm(n); x3 <- rnorm(n)
y <- x + x2 + x3 + rnorm(n, sd = .1)
e <- function(a, b) a -  sum( a * b ) / sum( b ^ 2) * b
ey <- e(e(y, x2), e(x3, x2))
ex <- e(e(x, x2), e(x3, x2))
sum(ey * ex) / sum(ex ^ 2)
## [1] 1.012232
coef(lm(y ~ x + x2 + x3 - 1)) #the -1 removes the intercept term
##         x        x2        x3 
## 1.0122321 0.9904511 0.9893869

  • Showing that order doesn’t matter
ey <- e(e(y, x3), e(x2, x3))
ex <- e(e(x, x3), e(x2, x3))
sum(ey * ex) / sum(ex ^ 2)
## [1] 1.012232
coef(lm(y ~ x + x2 + x3 - 1)) #the -1 removes the intercept term
##         x        x2        x3 
## 1.0122321 0.9904511 0.9893869

  • Residuals again
ey <- resid(lm(y ~ x2 + x3 - 1))
ex <- resid(lm(x ~ x2 + x3 - 1))
sum(ey * ex) / sum(ex ^ 2)
## [1] 1.012232
coef(lm(y ~ x + x2 + x3 - 1)) #the -1 removes the intercept term
##         x        x2        x3 
## 1.0122321 0.9904511 0.9893869

Interpretation of the coeficient

\[E[Y | X_1 = x_1, \ldots, X_p = x_p] = \sum_{k=1}^p x_{k} \beta_k\] So that \[ E[Y | X_1 = x_1 + 1, \ldots, X_p = x_p] - E[Y | X_1 = x_1, \ldots, X_p = x_p]\] \[= (x_1 + 1) \beta_1 + \sum_{k=2}^p x_{k}+ \sum_{k=1}^p x_{k} \beta_k = \beta_1 \] So that the interpretation of a multivariate regression coefficient is the expected change in the response per unit change in the regressor, holding all of the other regressors fixed.

In the next lecture, we’ll do examples and go over context-specific interpretations.


Fitted values, residuals and residual variation

All of our SLR quantities can be extended to linear models * Model \(Y_i = \sum_{k=1}^p X_{ik} \beta_{k} + \epsilon_{i}\) where \(\epsilon_i \sim N(0, \sigma^2)\) * Fitted responses \(\hat Y_i = \sum_{k=1}^p X_{ik} \hat \beta_{k}\) * Residuals \(e_i = Y_i - \hat Y_i\) * Variance estimate \(\hat \sigma^2 = \frac{1}{n-p} \sum_{i=1}^n e_i ^2\) * To get predicted responses at new values, \(x_1, \ldots, x_p\), simply plug them into the linear model \(\sum_{k=1}^p x_{k} \hat \beta_{k}\) * Coefficients have standard errors, \(\hat \sigma_{\hat \beta_k}\), and \(\frac{\hat \beta_k - \beta_k}{\hat \sigma_{\hat \beta_k}}\) follows a \(T\) distribution with \(n-p\) degrees of freedom. * Predicted responses have standard errors and we can calculate predicted and expected response intervals.


Linear models

  • Linear models are the single most important applied statistical and machine learning techniqe, by far.
  • Some amazing things that you can accomplish with linear models
  • Decompose a signal into its harmonics.
  • Flexibly fit complicated functions.
  • Fit factor variables as predictors.
  • Uncover complex multivariate relationships with the response.
  • Build accurate prediction models.

Multivariable regression examples

Scatterplot Matrices – Swiss fertility data

library(datasets); data(swiss); require(stats); require(graphics)
pairs(swiss, panel = panel.smooth, main = "Swiss data", col = 3 + (swiss$Catholic > 50))


?swiss

  • Description Standardized fertility measure and socio-economic indicators for each of 47 French-speaking provinces of Switzerland at about 1888.

A data frame with 47 observations on 6 variables, each of which is in percent, i.e., in [0, 100].

  • [,1] Fertility Ig, ‘common standardized fertility measure’
  • [,2] Agriculture % of males involved in agriculture as occupation
  • [,3] Examination % draftees receiving highest mark on army examination
  • [,4] Education % education beyond primary school for draftees.
  • [,5] Catholic % ‘catholic’ (as opposed to ‘protestant’).
  • [,6] Infant.Mortality live births who live less than 1 year.

All variables but ‘Fertility’ give proportions of the population.


Calling lm

summary(lm(Fertility ~ . , data = swiss))$coefficients
##                    Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept)      66.9151817 10.70603759  6.250229 1.906051e-07
## Agriculture      -0.1721140  0.07030392 -2.448142 1.872715e-02
## Examination      -0.2580082  0.25387820 -1.016268 3.154617e-01
## Education        -0.8709401  0.18302860 -4.758492 2.430605e-05
## Catholic          0.1041153  0.03525785  2.952969 5.190079e-03
## Infant.Mortality  1.0770481  0.38171965  2.821568 7.335715e-03

Example interpretation

  • Agriculture is expressed in percentages (0 - 100)
  • Estimate is -0.1721.
  • We estimate an expected 0.17 decrease in standardized fertility for every 1% increase in percentage of males involved in agriculture in holding the remaining variables constant.
  • The t-test for \(H_0: \beta_{Agri} = 0\) versus \(H_a: \beta_{Agri} \neq 0\) is significant.
  • Interestingly, the unadjusted estimate is
summary(lm(Fertility ~ Agriculture, data = swiss))$coefficients
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) 60.3043752 4.25125562 14.185074 3.216304e-18
## Agriculture  0.1942017 0.07671176  2.531577 1.491720e-02

How can adjustment reverse the sign of an effect? Let’s try a simulation.

n <- 100; x2 <- 1 : n; x1 <- .01 * x2 + runif(n, -.1, .1); y = -x1 + x2 + rnorm(n, sd = .01)
summary(lm(y ~ x1))$coef
##               Estimate Std. Error    t value     Pr(>|t|)
## (Intercept) -0.1777562   1.188008 -0.1496255 8.813676e-01
## x1          98.1976142   2.030437 48.3628062 3.376349e-70
summary(lm(y ~ x1 + x2))$coef
##                 Estimate   Std. Error     t value      Pr(>|t|)
## (Intercept)  0.003860563 0.0020226709    1.908646  5.926409e-02
## x1          -1.009777858 0.0174069110  -58.010169  4.106314e-77
## x2           1.000050771 0.0001719744 5815.113110 1.269586e-270

par(mfrow = c(1, 2))
plot(x1, y, pch=21,col="black",bg=topo.colors(n)[x2], frame = FALSE, cex = 1.5)
title('Unadjusted, color is X2')
abline(lm(y ~ x1), lwd = 2)
plot(resid(lm(x1 ~ x2)), resid(lm(y ~ x2)), pch = 21, col = "black", bg = "lightblue", frame = FALSE, cex = 1.5)
title('Adjusted')
abline(0, coef(lm(y ~ x1 + x2))[2], lwd = 2)


Back to this data set

  • The sign reverses itself with the inclusion of Examination and Education, but of which are negatively correlated with Agriculture.
  • The percent of males in the province working in agriculture is negatively related to educational attainment (correlation of -0.6395225) and Education and Examination (correlation of 0.6984153) are obviously measuring similar things.
  • Is the positive marginal an artifact for not having accounted for, say, Education level? (Education does have a stronger effect, by the way.)
  • At the minimum, anyone claiming that provinces that are more agricultural have higher fertility rates would immediately be open to criticism.

What if we include an unnecessary variable?

z adds no new linear information, since it’s a linear combination of variables already included. R just drops terms that are linear combinations of other terms.

z <- swiss$Agriculture + swiss$Education
lm(Fertility ~ . + z, data = swiss)
## 
## Call:
## lm(formula = Fertility ~ . + z, data = swiss)
## 
## Coefficients:
##      (Intercept)       Agriculture       Examination         Education  
##          66.9152           -0.1721           -0.2580           -0.8709  
##         Catholic  Infant.Mortality                 z  
##           0.1041            1.0770                NA

Dummy variables are smart / Indicator Variable / categories / levels

  • Consider the linear model \[ Y_i = \beta_0 + X_{i1} \beta_1 + \epsilon_{i} \] where each \(X_{i1}\) is binary so that it is a 1 if measurement \(i\) is in a group and 0 otherwise. (Treated versus not in a clinical trial, for example.)
  • Then for people in the group \(E[Y_i] = \beta_0 + \beta_1\)
  • And for people not in the group \(E[Y_i] = \beta_0\)
  • The LS fits work out to be \(\hat \beta_0 + \hat \beta_1\) is the mean for those in the group and \(\hat \beta_0\) is the mean for those not in the group.
  • \(\beta_1\) is interpretted as the increase or decrease in the mean comparing those in the group to those not.
  • Note including a binary variable that is 1 for those not in the group would be redundant. It would create three parameters to describe two means.

More than 2 levels

  • Consider a multilevel factor level. For didactic reasons, let’s say a three level factor (example, US political party affiliation: Republican, Democrat, Independent)
  • \(Y_i = \beta_0 + X_{i1} \beta_1 + X_{i2} \beta_2 + \epsilon_i\).
  • \(X_{i1}\) is 1 for Republicans and 0 otherwise.
  • \(X_{i2}\) is 1 for Democrats and 0 otherwise.
  • If \(i\) is Republican \(E[Y_i] = \beta_0 +\beta_1\)
  • If \(i\) is Democrat \(E[Y_i] = \beta_0 + \beta_2\).
  • If \(i\) is Independent \(E[Y_i] = \beta_0\).
  • \(\beta_1\) compares Republicans to Independents.
  • \(\beta_2\) compares Democrats to Independents.
  • \(\beta_1 - \beta_2\) compares Republicans to Democrats.
  • (Choice of reference category changes the interpretation.)

Insect Sprays

require(datasets);data(InsectSprays)
require(stats); require(graphics)
boxplot(count ~ spray, data = InsectSprays,
        xlab = "Type of spray", ylab = "Insect count",
        main = "InsectSprays data", varwidth = TRUE, col = "lightgray")


Linear model fit, group A is the reference

summary(lm(count ~ spray, data = InsectSprays))$coef
##                Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)  14.5000000   1.132156 12.8074279 1.470512e-19
## sprayB        0.8333333   1.601110  0.5204724 6.044761e-01
## sprayC      -12.4166667   1.601110 -7.7550382 7.266893e-11
## sprayD       -9.5833333   1.601110 -5.9854322 9.816910e-08
## sprayE      -11.0000000   1.601110 -6.8702352 2.753922e-09
## sprayF        2.1666667   1.601110  1.3532281 1.805998e-01

Hard coding the dummy variables

summary(lm(count ~ 
             I(1 * (spray == 'B')) + I(1 * (spray == 'C')) + 
             I(1 * (spray == 'D')) + I(1 * (spray == 'E')) +
             I(1 * (spray == 'F'))
           , data = InsectSprays))$coef
##                          Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)            14.5000000   1.132156 12.8074279 1.470512e-19
## I(1 * (spray == "B"))   0.8333333   1.601110  0.5204724 6.044761e-01
## I(1 * (spray == "C")) -12.4166667   1.601110 -7.7550382 7.266893e-11
## I(1 * (spray == "D"))  -9.5833333   1.601110 -5.9854322 9.816910e-08
## I(1 * (spray == "E")) -11.0000000   1.601110 -6.8702352 2.753922e-09
## I(1 * (spray == "F"))   2.1666667   1.601110  1.3532281 1.805998e-01

What if we include all 6?

lm(count ~ 
   I(1 * (spray == 'B')) + I(1 * (spray == 'C')) +  
   I(1 * (spray == 'D')) + I(1 * (spray == 'E')) +
   I(1 * (spray == 'F')) + I(1 * (spray == 'A')), data = InsectSprays)
## 
## Call:
## lm(formula = count ~ I(1 * (spray == "B")) + I(1 * (spray == 
##     "C")) + I(1 * (spray == "D")) + I(1 * (spray == "E")) + I(1 * 
##     (spray == "F")) + I(1 * (spray == "A")), data = InsectSprays)
## 
## Coefficients:
##           (Intercept)  I(1 * (spray == "B"))  I(1 * (spray == "C"))  
##               14.5000                 0.8333               -12.4167  
## I(1 * (spray == "D"))  I(1 * (spray == "E"))  I(1 * (spray == "F"))  
##               -9.5833               -11.0000                 2.1667  
## I(1 * (spray == "A"))  
##                    NA

What if we omit the intercept? — remove the intercept

summary(lm(count ~ spray - 1, data = InsectSprays))$coef
##         Estimate Std. Error   t value     Pr(>|t|)
## sprayA 14.500000   1.132156 12.807428 1.470512e-19
## sprayB 15.333333   1.132156 13.543487 1.001994e-20
## sprayC  2.083333   1.132156  1.840148 7.024334e-02
## sprayD  4.916667   1.132156  4.342749 4.953047e-05
## sprayE  3.500000   1.132156  3.091448 2.916794e-03
## sprayF 16.666667   1.132156 14.721181 1.573471e-22
unique(ave(InsectSprays$count, InsectSprays$spray))
## [1] 14.500000 15.333333  2.083333  4.916667  3.500000 16.666667

Summary

  • If we treat Spray as a factor, R includes an intercept and omits the alphabetically first level of the factor.
  • All t-tests are for comparisons of Sprays versus Spray A.
  • Emprirical mean for A is the intercept.
  • Other group means are the itc plus their coefficient.
  • If we omit an intercept, then it includes terms for all levels of the factor.
  • Group means are the coefficients.
  • Tests are tests of whether the groups are different than zero. (Are the expected counts zero for that spray.)
  • If we want comparisons between, Spray B and C, say we could refit the model with C (or B) as the reference level.

Reordering the levels

spray2 <- relevel(InsectSprays$spray, "C")
summary(lm(count ~ spray2, data = InsectSprays))$coef
##              Estimate Std. Error  t value     Pr(>|t|)
## (Intercept)  2.083333   1.132156 1.840148 7.024334e-02
## spray2A     12.416667   1.601110 7.755038 7.266893e-11
## spray2B     13.250000   1.601110 8.275511 8.509776e-12
## spray2D      2.833333   1.601110 1.769606 8.141205e-02
## spray2E      1.416667   1.601110 0.884803 3.794750e-01
## spray2F     14.583333   1.601110 9.108266 2.794343e-13

Doing it manually

Equivalently \[Var(\hat \beta_B - \hat \beta_C) = Var(\hat \beta_B) + Var(\hat \beta_C) - 2 Cov(\hat \beta_B, \hat \beta_C)\]

fit <- lm(count ~ spray, data = InsectSprays) #A is ref
bbmbc <- coef(fit)[2] - coef(fit)[3] #B - C
temp <- summary(fit) 
se <- temp$sigma * sqrt(temp$cov.unscaled[2, 2] + temp$cov.unscaled[3,3] - 2 *temp$cov.unscaled[2,3])
t <- (bbmbc) / se
p <- pt(-abs(t), df = fit$df)
out <- c(bbmbc, se, t, p)
names(out) <- c("B - C", "SE", "T", "P")
round(out, 3)
##  B - C     SE      T      P 
## 13.250  1.601  8.276  0.000

Other thoughts on this data

  • Counts are bounded from below by 0, violates the assumption of normality of the errors.
  • Also there are counts near zero, so both the actual assumption and the intent of the assumption are violated.
  • Variance does not appear to be constant.
  • Perhaps taking logs of the counts would help.
  • There are 0 counts, so maybe log(Count + 1)
  • Also, we’ll cover Poisson GLMs for fitting count data.

Example - Millenium Development Goal 1

http://www.un.org/millenniumgoals/pdf/MDG_FS_1_EN.pdf

http://apps.who.int/gho/athena/data/GHO/WHOSIS_000008.csv?profile=text&filter=COUNTRY:;SEX:


WHO childhood hunger data

#download.file("http://apps.who.int/gho/athena/data/GHO/WHOSIS_000008.csv?profile=text&filter=COUNTRY:*;SEX:*","hunger.csv",method="curl")
hunger <- read.csv("hunger.csv")
hunger <- hunger[hunger$Sex!="Both sexes",]
head(hunger)
##                                Indicator Data.Source PUBLISH.STATES Year
## 1 Children aged <5 years underweight (%) NLIS_310044      Published 1986
## 2 Children aged <5 years underweight (%) NLIS_310233      Published 1990
## 3 Children aged <5 years underweight (%) NLIS_312902      Published 2005
## 5 Children aged <5 years underweight (%) NLIS_312522      Published 2002
## 6 Children aged <5 years underweight (%) NLIS_312955      Published 2008
## 8 Children aged <5 years underweight (%) NLIS_312963      Published 2008
##              WHO.region       Country    Sex Display.Value Numeric Low
## 1                Africa       Senegal   Male          19.3    19.3  NA
## 2              Americas      Paraguay   Male           2.2     2.2  NA
## 3              Americas     Nicaragua   Male           5.3     5.3  NA
## 5 Eastern Mediterranean        Jordan Female           3.2     3.2  NA
## 6                Africa Guinea-Bissau Female          17.0    17.0  NA
## 8                Africa         Ghana   Male          15.7    15.7  NA
##   High Comments
## 1   NA       NA
## 2   NA       NA
## 3   NA       NA
## 5   NA       NA
## 6   NA       NA
## 8   NA       NA

Plot percent hungry versus time

lm1 <- lm(hunger$Numeric ~ hunger$Year)
plot(hunger$Year,hunger$Numeric,pch=19,col="blue")


Remember the linear model

\[Hu_i = b_0 + b_1 Y_i + e_i\]

\(b_0\) = percent hungry at Year 0

\(b_1\) = decrease in percent hungry per year

\(e_i\) = everything we didn’t measure


Add the linear model

lm1 <- lm(hunger$Numeric ~ hunger$Year)
plot(hunger$Year,hunger$Numeric,pch=19,col="blue")
lines(hunger$Year,lm1$fitted,lwd=3,col="darkgrey")


Color by male/female

plot(hunger$Year,hunger$Numeric,pch=19)
points(hunger$Year,hunger$Numeric,pch=19,col=((hunger$Sex=="Male")*1+1))


Now two lines

\[HuF_i = bf_0 + bf_1 YF_i + ef_i\]

\(bf_0\) = percent of girls hungry at Year 0

\(bf_1\) = decrease in percent of girls hungry per year

\(ef_i\) = everything we didn’t measure

\[HuM_i = bm_0 + bm_1 YM_i + em_i\]

\(bm_0\) = percent of boys hungry at Year 0

\(bm_1\) = decrease in percent of boys hungry per year

\(em_i\) = everything we didn’t measure


Color by male/female

lmM <- lm(hunger$Numeric[hunger$Sex=="Male"] ~ hunger$Year[hunger$Sex=="Male"])
lmF <- lm(hunger$Numeric[hunger$Sex=="Female"] ~ hunger$Year[hunger$Sex=="Female"])
plot(hunger$Year,hunger$Numeric,pch=19)
points(hunger$Year,hunger$Numeric,pch=19,col=((hunger$Sex=="Male")*1+1))
lines(hunger$Year[hunger$Sex=="Male"],lmM$fitted,col="black",lwd=3)
lines(hunger$Year[hunger$Sex=="Female"],lmF$fitted,col="red",lwd=3)


Two lines, same slope

\[Hu_i = b_0 + b_1 \mathbb{1}(Sex_i="Male") + b_2 Y_i + e^*_i\]

\(b_0\) - percent hungry at year zero for females

\(b_0 + b_1\) - percent hungry at year zero for males

\(b_2\) - change in percent hungry (for either males or females) in one year

\(e^*_i\) - everything we didn’t measure


Two lines, same slope in R

lmBoth <- lm(hunger$Numeric ~ hunger$Year + hunger$Sex)
plot(hunger$Year,hunger$Numeric,pch=19)
points(hunger$Year,hunger$Numeric,pch=19,col=((hunger$Sex=="Male")*1+1))
abline(c(lmBoth$coeff[1],lmBoth$coeff[2]),col="red",lwd=3)
abline(c(lmBoth$coeff[1] + lmBoth$coeff[3],lmBoth$coeff[2] ),col="black",lwd=3)


Two lines, different slopes (interactions)

\[Hu_i = b_0 + b_1 \mathbb{1}(Sex_i="Male") + b_2 Y_i + b_3 \mathbb{1}(Sex_i="Male")\times Y_i + e^+_i\]

\(b_0\) - percent hungry at year zero for females

\(b_0 + b_1\) - percent hungry at year zero for males

\(b_2\) - change in percent hungry (females) in one year

\(b_2 + b_3\) - change in percent hungry (males) in one year

\(e^+_i\) - everything we didn’t measure


Two lines, different slopes in R

lmBoth <- lm(hunger$Numeric ~ hunger$Year + hunger$Sex + hunger$Sex*hunger$Year)
plot(hunger$Year,hunger$Numeric,pch=19)
points(hunger$Year,hunger$Numeric,pch=19,col=((hunger$Sex=="Male")*1+1))
abline(c(lmBoth$coeff[1],lmBoth$coeff[2]),col="red",lwd=3)
abline(c(lmBoth$coeff[1] + lmBoth$coeff[3],lmBoth$coeff[2] +lmBoth$coeff[4]),col="black",lwd=3)


Two lines, different slopes in R

summary(lmBoth)
## 
## Call:
## lm(formula = hunger$Numeric ~ hunger$Year + hunger$Sex + hunger$Sex * 
##     hunger$Year)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.913 -11.248  -1.853   7.087  46.146 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                603.50580  171.05519   3.528 0.000439 ***
## hunger$Year                 -0.29340    0.08547  -3.433 0.000623 ***
## hunger$SexMale              61.94772  241.90858   0.256 0.797946    
## hunger$Year:hunger$SexMale  -0.03000    0.12087  -0.248 0.804022    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.21 on 944 degrees of freedom
## Multiple R-squared:  0.03181,    Adjusted R-squared:  0.02874 
## F-statistic: 10.34 on 3 and 944 DF,  p-value: 1.064e-06

Interpretting a continuous interaction

\[ E[Y_i | X_{1i}=x_1, X_{2i}=x_2] = \beta_0 + \beta_1 x_{1} + \beta_2 x_{2} + \beta_3 x_{1}x_{2} \] Holding \(X_2\) constant we have \[ E[Y_i | X_{1i}=x_1+1, X_{2i}=x_2]-E[Y_i | X_{1i}=x_1, X_{2i}=x_2] = \beta_1 + \beta_3 x_{2} \] And thus the expected change in \(Y\) per unit change in \(X_1\) holding all else constant is not constant. \(\beta_1\) is the slope when \(x_{2} = 0\). Note further that: \[ E[Y_i | X_{1i}=x_1+1, X_{2i}=x_2+1]-E[Y_i | X_{1i}=x_1, X_{2i}=x_2+1] \] \[ -E[Y_i | X_{1i}=x_1+1, X_{2i}=x_2]-E[Y_i | X_{1i}=x_1, X_{2i}=x_2] \] \[ =\beta_3 \] Thus, \(\beta_3\) is the change in the expected change in \(Y\) per unit change in \(X_1\), per unit change in \(X_2\).

Or, the change in the slope relating \(X_1\) and \(Y\) per unit change in \(X_2\).


Example

\[Hu_i = b_0 + b_1 In_i + b_2 Y_i + b_3 In_i \times Y_i + e^+_i\]

\(b_0\) - percent hungry at year zero for children with whose parents have no income

\(b_1\) - change in percent hungry for each dollar of income in year zero

\(b_2\) - change in percent hungry in one year for children whose parents have no income

\(b_3\) - increased change in percent hungry by year for each dollar of income - e.g. if income is $10,000, then change in percent hungry in one year will be

\[b_2 + 1e4 \times b_3\]

\(e^+_i\) - everything we didn’t measure

Lot’s of care/caution needed!


Multivariable regression Adjustment

Consider the following simulated data

Code for the first plot, rest omitted (See the git repo for the rest of the code.)


Simulation 1

n <- 100; t <- rep(c(0, 1), c(n/2, n/2)); x <- c(runif(n/2), runif(n/2)); 
beta0 <- 0; beta1 <- 2; tau <- 1; sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)


Discussion

Some things to note in this simulation

  • The X variable is unrelated to group status
  • The X variable is related to Y, but the intercept depends on group status.
  • The group variable is related to Y.
  • The relationship between group status and Y is constant depending on X.
  • The relationship between group and Y disregarding X is about the same as holding X constant

Simulation 2

n <- 100; t <- rep(c(0, 1), c(n/2, n/2)); x <- c(runif(n/2), 1.5 + runif(n/2)); 
beta0 <- 0; beta1 <- 2; tau <- 0; sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)


Discussion

Some things to note in this simulation

  • The X variable is highly related to group status
  • The X variable is related to Y, the intercept doesn’t depend on the group variable.
  • The X variable remains related to Y holding group status constant
  • The group variable is marginally related to Y disregarding X.
  • The model would estimate no adjusted effect due to group.
  • There isn’t any data to inform the relationship between group and Y.
  • This conclusion is entirely based on the model.

Simulation 3

n <- 100; t <- rep(c(0, 1), c(n/2, n/2)); x <- c(runif(n/2), .9 + runif(n/2)); 
beta0 <- 0; beta1 <- 2; tau <- -1; sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)


Discussion

Some things to note in this simulation

  • Marginal association has red group higher than blue.
  • Adjusted relationship has blue group higher than red.
  • Group status related to X.
  • There is some direct evidence for comparing red and blue holding X fixed.

Simulation 4

n <- 100; t <- rep(c(0, 1), c(n/2, n/2)); x <- c(.5 + runif(n/2), runif(n/2)); 
beta0 <- 0; beta1 <- 2; tau <- 1; sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)


Discussion

Some things to note in this simulation

  • No marginal association between group status and Y.
  • Strong adjusted relationship.
  • Group status not related to X.
  • There is lots of direct evidence for comparing red and blue holding X fixed.

Simulation 5

n <- 100; t <- rep(c(0, 1), c(n/2, n/2)); x <- c(runif(n/2, -1, 1), runif(n/2, -1, 1)); 
beta0 <- 0; beta1 <- 2; tau <- 0; tau1 <- -4; sigma <- .2
y <- beta0 + x * beta1 + t * tau + t * x * tau1 + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t + I(x * t))
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2] + coef(fit)[4], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)


Discussion

Some things to note from this simulation

  • There is no such thing as a group effect here.
  • The impact of group reverses itself depending on X.
  • Both intercept and slope depends on group.
  • Group status and X unrelated.
  • There’s lots of information about group effects holding X fixed.

Simulation 6

p <- 1
n <- 100; x2 <- runif(n); x1 <- p * runif(n) - (1 - p) * x2 
beta0 <- 0; beta1 <- 1; tau <- 4 ; sigma <- .01
y <- beta0 + x1 * beta1 + tau * x2 + rnorm(n, sd = sigma)
plot(x1, y, type = "n", frame = FALSE)
abline(lm(y ~ x1), lwd = 2)
co.pal <- heat.colors(n)
points(x1, y, pch = 21, col = "black", bg = co.pal[round((n - 1) * x2 + 1)], cex = 2)


Do this to investigate the bivariate relationship

library(rgl)
plot3d(x1, x2, y)

Residual relationship

plot(resid(lm(x1 ~ x2)), resid(lm(y ~ x2)), frame = FALSE, col = "black", bg = "lightblue", pch = 21, cex = 2)
abline(lm(I(resid(lm(x1 ~ x2))) ~ I(resid(lm(y ~ x2)))), lwd = 2)


Discussion

Some things to note from this simulation

  • X1 unrelated to X2
  • X2 strongly related to Y
  • Adjusted relationship between X1 and Y largely unchanged by considering X2.
  • Almost no residual variability after accounting for X2.

Some final thoughts

  • Modeling multivariate relationships is difficult.
  • Play around with simulations to see how the inclusion or exclustion of another variable can change analyses.
  • The results of these analyses deal with the impact of variables on associations.
  • Ascertaining mechanisms or cause are difficult subjects to be added on top of difficulty in understanding multivariate associations.

Residuals, diagnostics, variation

The linear model

  • Specified as \(Y_i = \sum_{k=1}^p X_{ik} \beta_j + \epsilon_{i}\)
  • We’ll also assume here that \(\epsilon_i \stackrel{iid}{\sim} N(0, \sigma^2)\)
  • Define the residuals as \(e_i = Y_i - \hat Y_i = Y_i - \sum_{k=1}^p X_{ik} \hat \beta_j\)
  • Our estimate of residual variation is \(\hat \sigma^2 = \frac{\sum_{i=1}^n e_i^2}{n-p}\), the \(n-p\) so that \(E[\hat \sigma^2] = \sigma^2\)

data(swiss); par(mfrow = c(2, 2))
fit <- lm(Fertility ~ . , data = swiss); plot(fit)


Influential, high leverage and outlying points

n <- 100; x <- rnorm(n); y <- x + rnorm(n, sd = .3)
plot(c(-3, 6), c(-3, 6), type = "n", frame = FALSE, xlab = "X", ylab = "Y")
abline(lm(y ~ x), lwd = 2)
points(x, y, cex = 2, bg = "lightblue", col = "black", pch = 21)
points(0, 0, cex = 2, bg = "darkorange", col = "black", pch = 21)
points(0, 5, cex = 2, bg = "darkorange", col = "black", pch = 21)
points(5, 5, cex = 2, bg = "darkorange", col = "black", pch = 21)
points(5, 0, cex = 2, bg = "darkorange", col = "black", pch = 21)


Summary of the plot

Calling a point an outlier is vague. * Outliers can be the result of spurious or real processes. * Outliers can have varying degrees of influence. * Outliers can conform to the regression relationship (i.e being marginally outlying in X or Y, but not outlying given the regression relationship). * Upper left hand point has low leverage, low influence, outlies in a way not conforming to the regression relationship. * Lower left hand point has low leverage, low influence and is not to be an outlier in any sense. * Upper right hand point has high leverage, but chooses not to extert it and thus would have low actual influence by conforming to the regresison relationship of the other points. * Lower right hand point has high leverage and would exert it if it were included in the fit.


Influence measures

  • Do ?influence.measures to see the full suite of influence measures in stats. The measures include
  • rstandard - standardized residuals, residuals divided by their standard deviations)
  • rstudent - standardized residuals, residuals divided by their standard deviations, where the ith data point was deleted in the calculation of the standard deviation for the residual to follow a t distribution
  • hatvalues - measures of leverage
  • dffits - change in the predicted response when the \(i^{th}\) point is deleted in fitting the model.
  • dfbetas - change in individual coefficients when the \(i^{th}\) point is deleted in fitting the model.
  • cooks.distance - overall change in the coefficients when the \(i^{th}\) point is deleted.
  • resid - returns the ordinary residuals
  • resid(fit) / (1 - hatvalues(fit)) where fit is the linear model fit returns the PRESS residuals, i.e. the leave one out cross validation residuals - the difference in the response and the predicted response at data point \(i\), where it was not included in the model fitting.

3How do I use all of these things?

  • Be wary of simplistic rules for diagnostic plots and measures. The use of these tools is context specific. It’s better to understand what they are trying to accomplish and use them judiciously.
  • Not all of the measures have meaningful absolute scales. You can look at them relative to the values across the data.
  • They probe your data in different ways to diagnose different problems.
  • Patterns in your residual plots generally indicate some poor aspect of model fit. These can include:
  • Heteroskedasticity (non constant variance).
  • Missing model terms.
  • Temporal patterns (plot residuals versus collection order).
  • Residual QQ plots investigate normality of the errors.
  • Leverage measures (hat values) can be useful for diagnosing data entry errors.
  • Influence measures get to the bottom line, ‘how does deleting or including this point impact a particular aspect of the model’.

Case 1

x <- c(10, rnorm(n)); y <- c(10, c(rnorm(n)))
plot(x, y, frame = FALSE, cex = 2, pch = 21, bg = "lightblue", col = "black")
abline(lm(y ~ x))            

  • The point c(10, 10) has created a strong regression relationship where there shouldn’t be one.

Showing a couple of the diagnostic values

fit <- lm(y ~ x)
round(dfbetas(fit)[1 : 10, 2], 3)
##      1      2      3      4      5      6      7      8      9     10 
##  7.381 -0.048 -0.019 -0.080 -0.032 -0.003 -0.031 -0.019  0.015  0.038
round(hatvalues(fit)[1 : 10], 3)
##     1     2     3     4     5     6     7     8     9    10 
## 0.496 0.038 0.010 0.013 0.012 0.010 0.011 0.011 0.024 0.015

Case 2

x <- rnorm(n); y <- x + rnorm(n, sd = .3)
x <- c(5, x); y <- c(5, y)
plot(x, y, frame = FALSE, cex = 2, pch = 21, bg = "lightblue", col = "black")
fit2 <- lm(y ~ x)
abline(fit2)            


Looking at some of the diagnostics

round(dfbetas(fit2)[1 : 10, 2], 3)
##      1      2      3      4      5      6      7      8      9     10 
##  0.072 -0.002 -0.001 -0.094  0.021  0.055 -0.033  0.124  0.003  0.012
round(hatvalues(fit2)[1 : 10], 3)
##     1     2     3     4     5     6     7     8     9    10 
## 0.204 0.010 0.010 0.014 0.010 0.019 0.017 0.016 0.012 0.010

Example described by Stefanski TAS 2007 Vol 61.

## Don't everyone hit this server at once.  Read the paper first.
dat <- read.table('http://www4.stat.ncsu.edu/~stefanski/NSF_Supported/Hidden_Images/orly_owl_files/orly_owl_Lin_4p_5_flat.txt', header = FALSE)
pairs(dat)


Got our P-values, should we bother to do a residual plot?

summary(lm(V1 ~ . -1, data = dat))$coef
##     Estimate Std. Error   t value     Pr(>|t|)
## V2 0.9856157 0.12798121  7.701253 1.989126e-14
## V3 0.9714707 0.12663829  7.671225 2.500259e-14
## V4 0.8606368 0.11958267  7.197003 8.301184e-13
## V5 0.9266981 0.08328434 11.126919 4.778110e-28

Residual plot

P-values significant, O RLY?

fit <- lm(V1 ~ . - 1, data = dat); plot(predict(fit), resid(fit), pch = '.')


Multiple variables

Multivariable regression

  • We have an entire class on prediction and machine learning, so we’ll focus on modeling.
  • Prediction has a different set of criteria, needs for interpretability and standards for generalizability.
  • In modeling, our interest lies in parsimonious, interpretable representations of the data that enhance our understanding of the phenomena under study.
  • A model is a lense through which to look at your data. (I attribute this quote to Scott Zeger)
  • Under this philosophy, what’s the right model? Whatever model connects the data to a true, parsimonious statement about what you’re studying.
  • There are nearly uncontable ways that a model can be wrong, in this lecture, we’ll focus on variable inclusion and exclusion.
  • Like nearly all aspects of statistics, good modeling decisions are context dependent.
  • A good model for prediction versus one for studying mechanisms versus one for trying to establish causal effects may not be the same.

The Rumsfeldian triplet

There are known knowns. These are things we know that we know. There are known unknowns. That is to say, there are things that we know we don’t know. But there are also unknown unknowns. There are things we don’t know we don’t know. Donald Rumsfeld

In our context * (Known knowns) Regressors that we know we should check to include in the model and have. * (Known Unknowns) Regressors that we would like to include in the model, but don’t have. * (Unknown Unknowns) Regressors that we don’t even know about that we should have included in the model.


General rules

  • Omitting variables results in bias in the coeficients of interest - unless their regressors are uncorrelated with the omitted ones.
  • This is why we randomize treatments, it attempts to uncorrelate our treatment indicator with variables that we don’t have to put in the model.
  • (If there’s too many unobserved confounding variables, even randomization won’t help you.)
  • Including variables that we shouldn’t have increases standard errors of the regression variables.
  • Actually, including any new variables increasese (actual, not estimated) standard errors of other regressors. So we don’t want to idly throw variables into the model.
  • The model must tend toward perfect fit as the number of non-redundant regressors approaches \(n\).
  • \(R^2\) increases monotonically as more regressors are included.
  • The SSE decreases monotonically as more regressors are included.

Plot of \(R^2\) versus \(n\)

For simulations as the number of variables included equals increases to \(n=100\). No actual regression relationship exist in any simulation

 n <- 100
plot(c(1, n), 0 : 1, type = "n", frame = FALSE, xlab = "p", ylab = "R^2")
r <- sapply(1 : n, function(p)
      {
        y <- rnorm(n); x <- matrix(rnorm(n * p), n, p)
        summary(lm(y ~ x))$r.squared 
      }
    )
lines(1 : n, r, lwd = 2)
abline(h = 1)


Variance inflation 1

n <- 100; nosim <- 1000
x1 <- rnorm(n); x2 <- rnorm(n); x3 <- rnorm(n); 
betas <- sapply(1 : nosim, function(i){
  y <- x1 + rnorm(n, sd = .3)
  c(coef(lm(y ~ x1))[2], 
    coef(lm(y ~ x1 + x2))[2], 
    coef(lm(y ~ x1 + x2 + x3))[2])
})
round(apply(betas, 1, sd), 5)
##      x1      x1      x1 
## 0.02848 0.02847 0.02846

Variance inflation 2

n <- 100; nosim <- 1000
x1 <- rnorm(n); x2 <- x1/sqrt(2) + rnorm(n) /sqrt(2)
x3 <- x1 * 0.95 + rnorm(n) * sqrt(1 - 0.95^2); 
betas <- sapply(1 : nosim, function(i){
  y <- x1 + rnorm(n, sd = .3)
  c(coef(lm(y ~ x1))[2], 
    coef(lm(y ~ x1 + x2))[2], 
    coef(lm(y ~ x1 + x2 + x3))[2])
})
round(apply(betas, 1, sd), 5)
##      x1      x1      x1 
## 0.03242 0.04794 0.09659

Variance inflation factors

  • Notice variance inflation was much worse when we included a variable that was highly related to x1.
  • We don’t know \(\sigma\), so we can only estimate the increase in the actual standard error of the coefficients for including a regressor.
  • However, \(\sigma\) drops out of the relative standard errors. If one sequentially adds variables, one can check the variance (or sd) inflation for including each one.
  • When the other regressors are actually orthogonal to the regressor of interest, then there is no variance inflation.
  • The variance inflation factor (VIF) is the increase in the variance for the ith regressor compared to the ideal setting where it is orthogonal to the other regressors.
  • (The square root of the VIF is the increase in the sd …)
  • Remember, variance inflation is only part of the picture. We want to include certain variables, even if they dramatically inflate our variance.

Revisting our previous simulation

##doesn't depend on which y you use,
y <- x1 + rnorm(n, sd = .3)
a <- summary(lm(y ~ x1))$cov.unscaled[2,2]
c(summary(lm(y ~ x1 + x2))$cov.unscaled[2,2],
  summary(lm(y~ x1 + x2 + x3))$cov.unscaled[2,2]) / a
## [1] 2.126855 9.566076
temp <- apply(betas, 1, var); temp[2 : 3] / temp[1]
##       x1       x1 
## 2.185780 8.875443

Swiss data

data(swiss); 
fit1 <- lm(Fertility ~ Agriculture, data = swiss)
a <- summary(fit1)$cov.unscaled[2,2]
fit2 <- update(fit1, Fertility ~ Agriculture + Examination)
fit3 <- update(fit1, Fertility ~ Agriculture + Examination + Education)
  c(summary(fit2)$cov.unscaled[2,2],
    summary(fit3)$cov.unscaled[2,2]) / a 
## [1] 1.891576 2.089159

Swiss data VIFs,

library(car)
fit <- lm(Fertility ~ . , data = swiss)
vif(fit)
##      Agriculture      Examination        Education         Catholic 
##         2.284129         3.675420         2.774943         1.937160 
## Infant.Mortality 
##         1.107542
sqrt(vif(fit)) #I prefer sd 
##      Agriculture      Examination        Education         Catholic 
##         1.511334         1.917138         1.665816         1.391819 
## Infant.Mortality 
##         1.052398

What about residual variance estimation?

  • Assuming that the model is linear with additive iid errors (with finite variance), we can mathematically describe the impact of omitting necessary variables or including unnecessary ones.
  • If we underfit the model, the variance estimate is biased.
  • If we correctly or overfit the model, including all necessary covariates and/or unnecessary covariates, the variance estimate is unbiased.
    • However, the variance of the variance is larger if we include unnecessary variables.

Covariate model selection

  • Automated covariate selection is a difficult topic. It depends heavily on how rich of a covariate space one wants to explore.
  • The space of models explodes quickly as you add interactions and polynomial terms.
  • In the prediction class, we’ll cover many modern methods for traversing large model spaces for the purposes of prediction.
  • Principal components or factor analytic models on covariates are often useful for reducing complex covariate spaces.
  • Good design can often eliminate the need for complex model searches at analyses; though often control over the design is limited.
  • If the models of interest are nested and without lots of parameters differentiating them, it’s fairly uncontroversial to use nested likelihood ratio tests. (Example to follow.)
  • My favoriate approach is as follows. Given a coefficient that I’m interested in, I like to use covariate adjustment and multiple models to probe that effect to evaluate it for robustness and to see what other covariates knock it out. This isn’t a terribly systematic approach, but it tends to teach you a lot about the the data as you get your hands dirty.

How to do nested model testing in R

fit1 <- lm(Fertility ~ Agriculture, data = swiss)
fit3 <- update(fit, Fertility ~ Agriculture + Examination + Education)
fit5 <- update(fit, Fertility ~ Agriculture + Examination + Education + Catholic + Infant.Mortality)
anova(fit1, fit3, fit5)
## Analysis of Variance Table
## 
## Model 1: Fertility ~ Agriculture
## Model 2: Fertility ~ Agriculture + Examination + Education
## Model 3: Fertility ~ Agriculture + Examination + Education + Catholic + 
##     Infant.Mortality
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     45 6283.1                                  
## 2     43 3180.9  2    3102.2 30.211 8.638e-09 ***
## 3     41 2105.0  2    1075.9 10.477 0.0002111 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Generalized linear models

Linear models

  • Linear models are the most useful applied statistical technique. However, they are not without their limitations.
  • Additive response models don’t make much sense if the response is discrete, or stricly positive.
  • Additive error models often don’t make sense, for example if the outcome has to be positive.
  • Transformations are often hard to interpret.
    • There’s value in modeling the data on the scale that it was collected.
    • Particularly interpetable transformations, natural logarithms in specific, aren’t applicable for negative or zero values.

Generalized linear models

  • Introduced in a 1972 RSSB paper by Nelder and Wedderburn.
  • Involves three components
  • An exponential family model for the response.
  • A systematic component via a linear predictor.
  • A link function that connects the means of the response to the linear predictor.

Example, linear models

  • Assume that \(Y_i \sim N(\mu_i, \sigma^2)\) (the Gaussian distribution is an exponential family distribution.)
  • Define the linear predictor to be \(\eta_i = \sum_{k=1}^p X_{ik} \beta_k\).
  • The link function as \(g\) so that \(g(\mu) = \eta\).
  • For linear models \(g(\mu) = \mu\) so that \(\mu_i = \eta_i\)
  • This yields the same likelihood model as our additive error Gaussian linear model \[ Y_i = \sum_{k=1}^p X_{ik} \beta_k + \epsilon_{i} \] where \(\epsilon_i \stackrel{iid}{\sim} N(0, \sigma^2)\)

Example, logistic regression

  • Assume that \(Y_i \sim Bernoulli(\mu_i)\) so that \(E[Y_i] = \mu_i\) where \(0\leq \mu_i \leq 1\).
  • Linear predictor \(\eta_i = \sum_{k=1}^p X_{ik} \beta_k\)
  • Link function \(g(\mu) = \eta = \log\left( \frac{\mu}{1 - \mu}\right)\) \(g\) is the (natural) log odds, referred to as the logit.
  • Note then we can invert the logit function as \[ \mu_i = \frac{\exp(\eta_i)}{1 + \exp(\eta_i)} ~~~\mbox{and}~~~ 1 - \mu_i = \frac{1}{1 + \exp(\eta_i)} \] Thus the likelihood is \[ \prod_{i=1}^n \mu_i^{y_i} (1 - \mu_i)^{1-y_i} = \exp\left(\sum_{i=1}^n y_i \eta_i \right) \prod_{i=1}^n (1 + \eta_i)^{-1} \]

Example, Poisson regression

  • Assume that \(Y_i \sim Poisson(\mu_i)\) so that \(E[Y_i] = \mu_i\) where \(0\leq \mu_i\)
  • Linear predictor \(\eta_i = \sum_{k=1}^p X_{ik} \beta_k\)
  • Link function \(g(\mu) = \eta = \log(\mu)\)
  • Recall that \(e^x\) is the inverse of \(\log(x)\) so that \[ \mu_i = e^{\eta_i} \] Thus, the likelihood is \[ \prod_{i=1}^n (y_i !)^{-1} \mu_i^{y_i}e^{-\mu_i} \propto \exp\left(\sum_{i=1}^n y_i \eta_i - \sum_{i=1}^n \mu_i\right) \]

Some things to note

  • In each case, the only way in which the likelihood depends on the data is through \[\sum_{i=1}^n y_i \eta_i = \sum_{i=1}^n y_i\sum_{k=1}^p X_{ik} \beta_k = \sum_{k=1}^p \beta_k\sum_{i=1}^n X_{ik} y_i \] Thus if we don’t need the full data, only \(\sum_{i=1}^n X_{ik} y_i\). This simplification is a consequence of chosing so-called ‘canonical’ link functions.
  • (This has to be derived). All models acheive their maximum at the root of the so called normal equations \[ 0=\sum_{i=1}^n \frac{(Y_i - \mu_i)}{Var(Y_i)}W_i \] where \(W_i\) are the derivative of the inverse of the link function.

About variances

\[ 0=\sum_{i=1}^n \frac{(Y_i - \mu_i)}{Var(Y_i)}W_i \] * For the linear model \(Var(Y_i) = \sigma^2\) is constant. * For Bernoulli case \(Var(Y_i) = \mu_i (1 - \mu_i)\) * For the Poisson case \(Var(Y_i) = \mu_i\). * In the latter cases, it is often relevant to have a more flexible variance model, even if it doesn’t correspond to an actual likelihood \[ 0=\sum_{i=1}^n \frac{(Y_i - \mu_i)}{\phi \mu_i (1 - \mu_i ) } W_i ~~~\mbox{and}~~~ 0=\sum_{i=1}^n \frac{(Y_i - \mu_i)}{\phi \mu_i} W_i \] * These are called ‘quasi-likelihood’ normal equations


Odds and ends

  • The normal equations have to be solved iteratively. Resulting in \(\hat \beta_k\) and, if included, \(\hat \phi\).
  • Predicted linear predictor responses can be obtained as \(\hat \eta = \sum_{k=1}^p X_k \hat \beta_k\)
  • Predicted mean responses as \(\hat \mu = g^{-1}(\hat \eta)\)
  • Coefficients are interpretted as \[ g(E[Y | X_k = x_k + 1, X_{\sim k} = x_{\sim k}]) - g(E[Y | X_k = x_k, X_{\sim k}=x_{\sim k}]) = \beta_k \] or the change in the link function of the expected response per unit change in \(X_k\) holding other regressors constant.
  • Variations on Newon/Raphson’s algorithm are used to do it.
  • Asymptotics are used for inference usually.
  • Many of the ideas from linear models can be brought over to GLMs.

Generalized linear models, binary data

Key ideas

  • Frequently we care about outcomes that have two values
  • Alive/dead
  • Win/loss
  • Success/Failure
  • etc
  • Called binary, Bernoulli or 0/1 outcomes
  • Collection of exchangeable binary outcomes for the same covariate data are called binomial outcomes.

Example Baltimore Ravens win/loss

Ravens Data

download.file("https://dl.dropboxusercontent.com/u/7710864/data/ravensData.rda"
              , destfile="ravensData.rda")
load("ravensData.rda")
head(ravensData)
##   ravenWinNum ravenWin ravenScore opponentScore
## 1           1        W         24             9
## 2           1        W         38            35
## 3           1        W         28            13
## 4           1        W         34            31
## 5           1        W         44            13
## 6           0        L         23            24

Linear regression

\[ RW_i = b_0 + b_1 RS_i + e_i \]

\(RW_i\) - 1 if a Ravens win, 0 if not

\(RS_i\) - Number of points Ravens scored

\(b_0\) - probability of a Ravens win if they score 0 points

\(b_1\) - increase in probability of a Ravens win for each additional point

\(e_i\) - residual variation due


Linear regression in R

lmRavens <- lm(ravensData$ravenWinNum ~ ravensData$ravenScore)
summary(lmRavens)$coef
##                         Estimate  Std. Error  t value   Pr(>|t|)
## (Intercept)           0.28503172 0.256643165 1.110615 0.28135043
## ravensData$ravenScore 0.01589917 0.009058997 1.755069 0.09625261

Odds

Binary Outcome 0/1

\[RW_i\]

Probability (0,1)

\[\rm{Pr}(RW_i | RS_i, b_0, b_1 )\]

Odds \((0,\infty)\) \[\frac{\rm{Pr}(RW_i | RS_i, b_0, b_1 )}{1-\rm{Pr}(RW_i | RS_i, b_0, b_1)}\]

Log odds \((-\infty,\infty)\)

\[\log\left(\frac{\rm{Pr}(RW_i | RS_i, b_0, b_1 )}{1-\rm{Pr}(RW_i | RS_i, b_0, b_1)}\right)\]


Linear vs. logistic regression

Linear

\[ RW_i = b_0 + b_1 RS_i + e_i \]

or

\[ E[RW_i | RS_i, b_0, b_1] = b_0 + b_1 RS_i\]

Logistic

\[ \rm{Pr}(RW_i | RS_i, b_0, b_1) = \frac{\exp(b_0 + b_1 RS_i)}{1 + \exp(b_0 + b_1 RS_i)}\]

or

\[ \log\left(\frac{\rm{Pr}(RW_i | RS_i, b_0, b_1 )}{1-\rm{Pr}(RW_i | RS_i, b_0, b_1)}\right) = b_0 + b_1 RS_i \]


Interpreting Logistic Regression

\[ \log\left(\frac{\rm{Pr}(RW_i | RS_i, b_0, b_1 )}{1-\rm{Pr}(RW_i | RS_i, b_0, b_1)}\right) = b_0 + b_1 RS_i \]

\(b_0\) - Log odds of a Ravens win if they score zero points

\(b_1\) - Log odds ratio of win probability for each point scored (compared to zero points)

\(\exp(b_1)\) - Odds ratio of win probability for each point scored (compared to zero points)


Odds

  • Imagine that you are playing a game where you flip a coin with success probability \(p\).
  • If it comes up heads, you win \(X\). If it comes up tails, you lose \(Y\).
  • What should we set \(X\) and \(Y\) for the game to be fair?

    \[E[earnings]= X p - Y (1 - p) = 0\]
  • Implies \[\frac{Y}{X} = \frac{p}{1 - p}\]
  • The odds can be said as “How much should you be willing to pay for a \(p\) probability of winning a dollar?”
    • (If \(p > 0.5\) you have to pay more if you lose than you get if you win.)
    • (If \(p < 0.5\) you have to pay less if you lose than you get if you win.)

Visualizing fitting logistic regression curves

x <- seq(-10, 10, length = 1000)
manipulate(
    plot(x, exp(beta0 + beta1 * x) / (1 + exp(beta0 + beta1 * x)), 
         type = "l", lwd = 3, frame = FALSE),
    beta1 = slider(-2, 2, step = .1, initial = 2),
    beta0 = slider(-2, 2, step = .1, initial = 0)
    )

Ravens logistic regression

logRegRavens <- glm(ravensData$ravenWinNum ~ ravensData$ravenScore,family="binomial")
summary(logRegRavens)
## 
## Call:
## glm(formula = ravensData$ravenWinNum ~ ravensData$ravenScore, 
##     family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7575  -1.0999   0.5305   0.8060   1.4947  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -1.68001    1.55412  -1.081     0.28
## ravensData$ravenScore  0.10658    0.06674   1.597     0.11
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 24.435  on 19  degrees of freedom
## Residual deviance: 20.895  on 18  degrees of freedom
## AIC: 24.895
## 
## Number of Fisher Scoring iterations: 5

Ravens fitted values

plot(ravensData$ravenScore,logRegRavens$fitted,pch=19,col="blue",xlab="Score",ylab="Prob Ravens Win")


Odds ratios and confidence intervals

exp(logRegRavens$coeff)
##           (Intercept) ravensData$ravenScore 
##             0.1863724             1.1124694
exp(confint(logRegRavens))
## Waiting for profiling to be done...
##                             2.5 %   97.5 %
## (Intercept)           0.005674966 3.106384
## ravensData$ravenScore 0.996229662 1.303304

ANOVA for logistic regression

anova(logRegRavens,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: ravensData$ravenWinNum
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                                     19     24.435           
## ravensData$ravenScore  1   3.5398        18     20.895  0.05991 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interpreting Odds Ratios

  • Not probabilities
  • Odds ratio of 1 = no difference in odds
  • Log odds ratio of 0 = no difference in odds
  • Odds ratio < 0.5 or > 2 commonly a “moderate effect”
  • Relative risk \(\frac{\rm{Pr}(RW_i | RS_i = 10)}{\rm{Pr}(RW_i | RS_i = 0)}\) often easier to interpret, harder to estimate
  • For small probabilities RR \(\approx\) OR but they are not the same!

Wikipedia on Odds Ratio


Count outcomes, Poisson GLMs

Key ideas

  • Many data take the form of counts
  • Calls to a call center
  • Number of flu cases in an area
  • Number of cars that cross a bridge
  • Data may also be in the form of rates
  • Percent of children passing a test
  • Percent of hits to a website from a country
  • Linear regression with transformation is an option

Poisson distribution

  • The Poisson distribution is a useful model for counts and rates
  • Here a rate is count per some monitoring time
  • Some examples uses of the Poisson distribution
    • Modeling web traffic hits
    • Incidence rates
    • Approximating binomial probabilities with small \(p\) and large \(n\)
    • Analyzing contigency table data

The Poisson mass function

  • \(X \sim Poisson(t\lambda)\) if \[ P(X = x) = \frac{(t\lambda)^x e^{-t\lambda}}{x!} \] For \(x = 0, 1, \ldots\).
  • The mean of the Poisson is \(E[X] = t\lambda\), thus \(E[X / t] = \lambda\)
  • The variance of the Poisson is \(Var(X) = t\lambda\).
  • The Poisson tends to a normal as \(t\lambda\) gets large.

par(mfrow = c(1, 3))
plot(0 : 10, dpois(0 : 10, lambda = 2), type = "h", frame = FALSE)
plot(0 : 20, dpois(0 : 20, lambda = 10), type = "h", frame = FALSE)
plot(0 : 200, dpois(0 : 200, lambda = 100), type = "h", frame = FALSE) 


Poisson distribution

Sort of, showing that the mean and variance are equal

x <- 0 : 10000; lambda = 3
mu <- sum(x * dpois(x, lambda = lambda))
sigmasq <- sum((x - mu)^2 * dpois(x, lambda = lambda))
c(mu, sigmasq)
## [1] 3 3

Example: Leek Group Website Traffic

  • Consider the daily counts to Jeff Leek’s web site

http://biostat.jhsph.edu/~jleek/

  • Since the unit of time is always one day, set \(t = 1\) and then the Poisson mean is interpretted as web hits per day. (If we set \(t = 24\), it would be web hits per hour).

Website data

download.file("https://dl.dropboxusercontent.com/u/7710864/data/gaData.rda",destfile="gaData.rda")
load("gaData.rda")
gaData$julian <- julian(gaData$date)
head(gaData)
##         date visits simplystats julian
## 1 2011-01-01      0           0  14975
## 2 2011-01-02      0           0  14976
## 3 2011-01-03      0           0  14977
## 4 2011-01-04      0           0  14978
## 5 2011-01-05      0           0  14979
## 6 2011-01-06      0           0  14980

http://skardhamar.github.com/rga/


Plot data

plot(gaData$julian,gaData$visits,pch=19,col="darkgrey",xlab="Julian",ylab="Visits")


Linear regression

\[ NH_i = b_0 + b_1 JD_i + e_i \]

\(NH_i\) - number of hits to the website

\(JD_i\) - day of the year (Julian day)

\(b_0\) - number of hits on Julian day 0 (1970-01-01)

\(b_1\) - increase in number of hits per unit day

\(e_i\) - variation due to everything we didn’t measure


Linear regression line

plot(gaData$julian,gaData$visits,pch=19,col="darkgrey",xlab="Julian",ylab="Visits")
lm1 <- lm(gaData$visits ~ gaData$julian)
abline(lm1,col="red",lwd=3)


Aside, taking the log of the outcome

  • Taking the natural log of the outcome has a specific interpretation.
  • Consider the model

\[ \log(NH_i) = b_0 + b_1 JD_i + e_i \]

\(NH_i\) - number of hits to the website

\(JD_i\) - day of the year (Julian day)

\(b_0\) - log number of hits on Julian day 0 (1970-01-01)

\(b_1\) - increase in log number of hits per unit day

\(e_i\) - variation due to everything we didn’t measure


Exponentiating coefficients

  • \(e^{E[\log(Y)]}\) geometric mean of \(Y\).
    • With no covariates, this is estimated by \(e^{\frac{1}{n}\sum_{i=1}^n \log(y_i)} = (\prod_{i=1}^n y_i)^{1/n}\)
  • When you take the natural log of outcomes and fit a regression model, your exponentiated coefficients estimate things about geometric means.
  • \(e^{\beta_0}\) estimated geometric mean hits on day 0
  • \(e^{\beta_1}\) estimated relative increase or decrease in geometric mean hits per day
  • There’s a problem with logs with you have zero counts, adding a constant works
round(exp(coef(lm(I(log(gaData$visits + 1)) ~ gaData$julian))), 5)
##   (Intercept) gaData$julian 
##       0.00000       1.00231

Linear vs. Poisson regression

Linear

\[ NH_i = b_0 + b_1 JD_i + e_i \]

or

\[ E[NH_i | JD_i, b_0, b_1] = b_0 + b_1 JD_i\]

Poisson/log-linear

\[ \log\left(E[NH_i | JD_i, b_0, b_1]\right) = b_0 + b_1 JD_i \]

or

\[ E[NH_i | JD_i, b_0, b_1] = \exp\left(b_0 + b_1 JD_i\right) \]


Multiplicative differences



\[ E[NH_i | JD_i, b_0, b_1] = \exp\left(b_0 + b_1 JD_i\right) \]



\[ E[NH_i | JD_i, b_0, b_1] = \exp\left(b_0 \right)\exp\left(b_1 JD_i\right) \]



If \(JD_i\) is increased by one unit, \(E[NH_i | JD_i, b_0, b_1]\) is multiplied by \(\exp\left(b_1\right)\)


Poisson regression in R

plot(gaData$julian,gaData$visits,pch=19,col="darkgrey",xlab="Julian",ylab="Visits")
glm1 <- glm(gaData$visits ~ gaData$julian,family="poisson")
abline(lm1,col="red",lwd=3); lines(gaData$julian,glm1$fitted,col="blue",lwd=3)


Mean-variance relationship?

plot(glm1$fitted,glm1$residuals,pch=19,col="grey",ylab="Residuals",xlab="Fitted")


Model agnostic standard errors

library(sandwich)
confint.agnostic <- function (object, parm, level = 0.95, ...)
{
    cf <- coef(object); pnames <- names(cf)
    if (missing(parm))
        parm <- pnames
    else if (is.numeric(parm))
        parm <- pnames[parm]
    a <- (1 - level)/2; a <- c(a, 1 - a)
    pct <- stats:::format.perc(a, 3)
    fac <- qnorm(a)
    ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
                                                               pct))
    ses <- sqrt(diag(sandwich::vcovHC(object)))[parm]
    ci[] <- cf[parm] + ses %o% fac
    ci
}

http://stackoverflow.com/questions/3817182/vcovhc-and-confidence-interval


Estimating confidence intervals

confint(glm1)
## Waiting for profiling to be done...
##                       2.5 %        97.5 %
## (Intercept)   -34.346577587 -31.159715656
## gaData$julian   0.002190043   0.002396461
confint.agnostic(glm1)
##                       2.5 %        97.5 %
## (Intercept)   -36.362674594 -29.136997254
## gaData$julian   0.002058147   0.002527955

Rates



\[ E[NHSS_i | JD_i, b_0, b_1]/NH_i = \exp\left(b_0 + b_1 JD_i\right) \]



\[ \log\left(E[NHSS_i | JD_i, b_0, b_1]\right) - \log(NH_i) = b_0 + b_1 JD_i \]



\[ \log\left(E[NHSS_i | JD_i, b_0, b_1]\right) = \log(NH_i) + b_0 + b_1 JD_i \]


Fitting rates in R

glm2 <- glm(gaData$simplystats ~ julian(gaData$date),offset=log(visits+1),
            family="poisson",data=gaData)
plot(julian(gaData$date),glm2$fitted,col="blue",pch=19,xlab="Date",ylab="Fitted Counts")
points(julian(gaData$date),glm1$fitted,col="red",pch=19)


Fitting rates in R

glm2 <- glm(gaData$simplystats ~ julian(gaData$date),offset=log(visits+1),
            family="poisson",data=gaData)
plot(julian(gaData$date),gaData$simplystats/(gaData$visits+1),col="grey",xlab="Date",
     ylab="Fitted Rates",pch=19)
lines(julian(gaData$date),glm2$fitted/(gaData$visits+1),col="blue",lwd=3)


Hodgepodge

How to fit functions using linear models

  • Consider a model \(Y_i = f(X_i) + \epsilon\).
  • How can we fit such a model using linear models (called scatterplot smoothing)
  • Consider the model \[ Y_i = \beta_0 + \beta_1 X_i + \sum_{k=1}^d (x_i - \xi_k)_+ \gamma_k + \epsilon_{i} \] where \((a)_+ = a\) if \(a > 0\) and \(0\) otherwise and \(\xi_1 \leq ... \leq \xi_d\) are known knot points.
  • Prove to yourelf that the mean function \[ \beta_0 + \beta_1 X_i + \sum_{k=1}^d (x_i - \xi_k)_+ \gamma_k \] is continuous at the knot points.

Simulated example

n <- 500; x <- seq(0, 4 * pi, length = n); y <- sin(x) + rnorm(n, sd = .3)
knots <- seq(0, 8 * pi, length = 20); 
splineTerms <- sapply(knots, function(knot) (x > knot) * (x - knot))
xMat <- cbind(1, x, splineTerms)
yhat <- predict(lm(y ~ xMat - 1))
plot(x, y, frame = FALSE, pch = 21, bg = "lightblue", cex = 2)
lines(x, yhat, col = "red", lwd = 2)


Adding squared terms

  • Adding squared terms makes it continuously differentiable at the knot points.
  • Adding cubic terms makes it twice continuously differentiable at the knot points; etcetera. \[ Y_i = \beta_0 + \beta_1 X_i + \beta_2 X_i^2 + \sum_{k=1}^d (x_i - \xi_k)_+^2 \gamma_k + \epsilon_{i} \]

splineTerms <- sapply(knots, function(knot) (x > knot) * (x - knot)^2)
xMat <- cbind(1, x, x^2, splineTerms)
yhat <- predict(lm(y ~ xMat - 1))
plot(x, y, frame = FALSE, pch = 21, bg = "lightblue", cex = 2)
lines(x, yhat, col = "red", lwd = 2)


Notes

  • The collection of regressors is called a basis.
  • People have spent a lot of time thinking about bases for this kind of problem. So, consider this as just a teaser.
  • Single knot point terms can fit hockey stick like processes.
  • These bases can be used in GLMs as well.
  • An issue with these approaches is the large number of parameters introduced.
  • Requires some method of so called regularization.

Harmonics using linear models

##Chord finder, playing the white keys on a piano from octave c4 - c5
notes4 <- c(261.63, 293.66, 329.63, 349.23, 392.00, 440.00, 493.88, 523.25)
t <- seq(0, 2, by = .001); n <- length(t)
c4 <- sin(2 * pi * notes4[1] * t); e4 <- sin(2 * pi * notes4[3] * t); 
g4 <- sin(2 * pi * notes4[5] * t)
chord <- c4 + e4 + g4 + rnorm(n, 0, 0.3)
x <- sapply(notes4, function(freq) sin(2 * pi * freq * t))
fit <- lm(chord ~ x - 1)

plot(c(0, 9), c(0, 1.5), xlab = "Note", ylab = "Coef^2", axes = FALSE, frame = TRUE, type = "n")
axis(2)
axis(1, at = 1 : 8, labels = c("c4", "d4", "e4", "f4", "g4", "a4", "b4", "c5"))
for (i in 1 : 8) abline(v = i, lwd = 3, col = grey(.8))
lines(c(0, 1 : 8, 9), c(0, coef(fit)^2, 0), type = "l", lwd = 3, col = "red")


##(How you would really do it)
a <- fft(chord); plot(Re(a)^2, type = "l")


Practical Machine Learning


What is prediction?

The central dogma of prediction


Components of a predictor


question -> input data -> features -> algorithm -> parameters -> evaluation

SPAM Example


question -> input data -> features -> algorithm -> parameters -> evaluation


Start with a general question

Can I automatically detect emails that are SPAM that are not?

Make it concrete

Can I use quantitative characteristics of the emails to classify them as SPAM/HAM?


SPAM Example


question -> input data -> features -> algorithm -> parameters -> evaluation

http://rss.acs.unt.edu/Rdoc/library/kernlab/html/spam.html


SPAM Example


question -> input data -> features -> algorithm -> parameters -> evaluation


Dear Jeff,

Can you send me your address so I can send you the invitation?

Thanks,

Ben


SPAM Example


question -> input data -> features -> algorithm -> parameters -> evaluation


Dear Jeff,

Can you send me your address so I can send you the invitation?

Thanks,

Ben


Frequency of you \(= 2/17 = 0.118\)


SPAM Example


question -> input data -> features -> algorithm -> parameters -> evaluation
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
data(spam)
head(spam)
##   make address  all num3d  our over remove internet order mail receive
## 1 0.00    0.64 0.64     0 0.32 0.00   0.00     0.00  0.00 0.00    0.00
## 2 0.21    0.28 0.50     0 0.14 0.28   0.21     0.07  0.00 0.94    0.21
## 3 0.06    0.00 0.71     0 1.23 0.19   0.19     0.12  0.64 0.25    0.38
## 4 0.00    0.00 0.00     0 0.63 0.00   0.31     0.63  0.31 0.63    0.31
## 5 0.00    0.00 0.00     0 0.63 0.00   0.31     0.63  0.31 0.63    0.31
## 6 0.00    0.00 0.00     0 1.85 0.00   0.00     1.85  0.00 0.00    0.00
##   will people report addresses free business email  you credit your font
## 1 0.64   0.00   0.00      0.00 0.32     0.00  1.29 1.93   0.00 0.96    0
## 2 0.79   0.65   0.21      0.14 0.14     0.07  0.28 3.47   0.00 1.59    0
## 3 0.45   0.12   0.00      1.75 0.06     0.06  1.03 1.36   0.32 0.51    0
## 4 0.31   0.31   0.00      0.00 0.31     0.00  0.00 3.18   0.00 0.31    0
## 5 0.31   0.31   0.00      0.00 0.31     0.00  0.00 3.18   0.00 0.31    0
## 6 0.00   0.00   0.00      0.00 0.00     0.00  0.00 0.00   0.00 0.00    0
##   num000 money hp hpl george num650 lab labs telnet num857 data num415
## 1   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 2   0.43  0.43  0   0      0      0   0    0      0      0    0      0
## 3   1.16  0.06  0   0      0      0   0    0      0      0    0      0
## 4   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 5   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 6   0.00  0.00  0   0      0      0   0    0      0      0    0      0
##   num85 technology num1999 parts pm direct cs meeting original project
## 1     0          0    0.00     0  0   0.00  0       0     0.00       0
## 2     0          0    0.07     0  0   0.00  0       0     0.00       0
## 3     0          0    0.00     0  0   0.06  0       0     0.12       0
## 4     0          0    0.00     0  0   0.00  0       0     0.00       0
## 5     0          0    0.00     0  0   0.00  0       0     0.00       0
## 6     0          0    0.00     0  0   0.00  0       0     0.00       0
##     re  edu table conference charSemicolon charRoundbracket
## 1 0.00 0.00     0          0          0.00            0.000
## 2 0.00 0.00     0          0          0.00            0.132
## 3 0.06 0.06     0          0          0.01            0.143
## 4 0.00 0.00     0          0          0.00            0.137
## 5 0.00 0.00     0          0          0.00            0.135
## 6 0.00 0.00     0          0          0.00            0.223
##   charSquarebracket charExclamation charDollar charHash capitalAve
## 1                 0           0.778      0.000    0.000      3.756
## 2                 0           0.372      0.180    0.048      5.114
## 3                 0           0.276      0.184    0.010      9.821
## 4                 0           0.137      0.000    0.000      3.537
## 5                 0           0.135      0.000    0.000      3.537
## 6                 0           0.000      0.000    0.000      3.000
##   capitalLong capitalTotal type
## 1          61          278 spam
## 2         101         1028 spam
## 3         485         2259 spam
## 4          40          191 spam
## 5          40          191 spam
## 6          15           54 spam

SPAM Example

question -> input data -> features -> algorithm -> parameters -> evaluation
plot(density(spam$your[spam$type=="nonspam"]),
     col="blue",main="",xlab="Frequency of 'your'")
lines(density(spam$your[spam$type=="spam"]),col="red")


SPAM Example

question -> input data -> features -> algorithm -> parameters -> evaluation



Our algorithm

  • Find a value \(C\).
  • frequency of ‘your’ \(>\) C predict “spam”

SPAM Example

question -> input data -> features -> algorithm -> parameters -> evaluation
plot(density(spam$your[spam$type=="nonspam"]),
     col="blue",main="",xlab="Frequency of 'your'")
lines(density(spam$your[spam$type=="spam"]),col="red")
abline(v=0.5,col="black")


SPAM Example

question -> input data -> features -> algorithm -> parameters -> evaluation
prediction <- ifelse(spam$your > 0.5,"spam","nonspam")
table(prediction,spam$type)/length(spam$type)
##           
## prediction   nonspam      spam
##    nonspam 0.4590306 0.1017170
##    spam    0.1469246 0.2923278

Accuracy$ 0.459 + 0.292 = 0.751$


In sample and out of sample error

In sample versus out of sample

In Sample Error: The error rate you get on the same data set you used to build your predictor. Sometimes called resubstitution error.

Out of Sample Error: The error rate you get on a new data set. Sometimes called generalization error.

Key ideas

  1. Out of sample error is what you care about
  2. In sample error \(<\) out of sample error
  3. The reason is overfitting
  • Matching your algorithm to the data you have

In sample versus out of sample errors

library(kernlab); data(spam); set.seed(333)
smallSpam <- spam[sample(dim(spam)[1],size=10),]
spamLabel <- (smallSpam$type=="spam")*1 + 1
plot(smallSpam$capitalAve,col=spamLabel)


Prediction rule 1

  • capitalAve \(>\) 2.7 = “spam”
  • capitalAve \(<\) 2.40 = “nonspam”
  • capitalAve between 2.40 and 2.45 = “spam”
  • capitalAve between 2.45 and 2.7 = “nonspam”

Apply Rule 1 to smallSpam

rule1 <- function(x){
  prediction <- rep(NA,length(x))
  prediction[x > 2.7] <- "spam"
  prediction[x < 2.40] <- "nonspam"
  prediction[(x >= 2.40 & x <= 2.45)] <- "spam"
  prediction[(x > 2.45 & x <= 2.70)] <- "nonspam"
  return(prediction)
}
table(rule1(smallSpam$capitalAve),smallSpam$type)
##          
##           nonspam spam
##   nonspam       5    0
##   spam          0    5

Prediction rule 2

  • capitalAve \(>\) 2.40 = “spam”
  • capitalAve \(\leq\) 2.40 = “nonspam”

Apply Rule 2 to smallSpam

rule2 <- function(x){
  prediction <- rep(NA,length(x))
  prediction[x > 2.8] <- "spam"
  prediction[x <= 2.8] <- "nonspam"
  return(prediction)
}
table(rule2(smallSpam$capitalAve),smallSpam$type)
##          
##           nonspam spam
##   nonspam       5    1
##   spam          0    4

Apply to complete spam data

table(rule1(spam$capitalAve),spam$type)
##          
##           nonspam spam
##   nonspam    2141  588
##   spam        647 1225
table(rule2(spam$capitalAve),spam$type)
##          
##           nonspam spam
##   nonspam    2224  642
##   spam        564 1171
mean(rule1(spam$capitalAve)==spam$type)
## [1] 0.7315801
mean(rule2(spam$capitalAve)==spam$type)
## [1] 0.7378831

Look at accuracy

sum(rule1(spam$capitalAve)==spam$type)
## [1] 3366
sum(rule2(spam$capitalAve)==spam$type)
## [1] 3395

What’s going on?

Overfitting
  • Data have two parts
  • Signal
  • Noise
  • The goal of a predictor is to find signal
  • You can always design a perfect in-sample predictor
  • You capture both signal + noise when you do that
  • Predictor won’t perform as well on new samples

http://en.wikipedia.org/wiki/Overfitting


Prediction study design

Prediction study design

  1. Define your error rate
  2. Split data into:
  • Training, Testing, Validation (optional)
  1. On the training set pick features
  • Use cross-validation
  1. On the training set pick prediction function
  • Use cross-validation
  1. If no validation
  • Apply 1x to test set
  1. If validation
  • Apply to test set and refine
  • Apply 1x to validation

Avoid small sample sizes

  • Suppose you are predicting a binary outcome
  • Diseased/healthy
  • Click on ad/not click on ad
  • One classifier is flipping a coin
  • Probability of perfect classification is approximately:
  • \(\left(\frac{1}{2}\right)^{sample \; size}\)
  • \(n = 1\) flipping coin 50% chance of 100% accuracy
  • \(n = 2\) flipping coin 25% chance of 100% accuracy
  • \(n = 10\) flipping coin 0.10% chance of 100% accuracy

Rules of thumb for prediction study design

  • If you have a large sample size
  • 60% training
  • 20% test
  • 20% validation
  • If you have a medium sample size
  • 60% training
  • 40% testing
  • If you have a small sample size
  • Do cross validation
  • Report caveat of small sample size

Some principles to remember

  • Set the test/validation set aside and don’t look at it
  • In general randomly sample training and test
  • Your data sets must reflect structure of the problem
  • If predictions evolve with time split train/test in time chunks (calledbacktesting in finance)
  • All subsets should reflect as much diversity as possible
  • Random assignment does this
  • You can also try to balance by features - but this is tricky

Types of errors

Basic terms

In general, Positive = identified and negative = rejected. Therefore:

True positive = correctly identified

False positive = incorrectly identified

True negative = correctly rejected

False negative = incorrectly rejected

Medical testing example:

True positive = Sick people correctly diagnosed as sick

False positive= Healthy people incorrectly identified as sick

True negative = Healthy people correctly identified as healthy

False negative = Sick people incorrectly identified as healthy.

http://en.wikipedia.org/wiki/Sensitivity_and_specificity


For continuous data

Mean squared error (MSE):

\[\frac{1}{n} \sum_{i=1}^n (Prediction_i - Truth_i)^2\]

Root mean squared error (RMSE):

\[\sqrt{\frac{1}{n} \sum_{i=1}^n(Prediction_i - Truth_i)^2}\]


Common error measures

  1. Mean squared error (or root mean squared error)
  • Continuous data, sensitive to outliers
  1. Median absolute deviation
  • Continuous data, often more robust
  1. Sensitivity (recall)
  • If you want few missed positives
  1. Specificity
  • If you want few negatives called positives
  1. Accuracy
  • Weights false positives/negatives equally
  1. Concordance
  1. Predictive value of a positive (precision)
  • When you are screeing and prevelance is low

ROC curves

Why a curve?

  • In binary classification you are predicting one of two categories
  • Alive/dead
  • Click on ad/don’t click
  • But your predictions are often quantitative
  • Probability of being alive
  • Prediction on a scale from 1 to 10
  • The cutoff you choose gives different results

Area under the curve

  • AUC = 0.5: random guessing
  • AUC = 1: perfect classifer
  • In general AUC of above 0.8 considered “good”

http://en.wikipedia.org/wiki/Receiver_operating_characteristic


Cross validation

Key idea

  1. Accuracy on the training set (resubstitution accuracy) is optimistic
  2. A better estimate comes from an independent set (test set accuracy)
  3. But we can’t use the test set when building the model or it becomes part of the training set
  4. So we estimate the test set accuracy with the training set.

Cross-validation

Approach:

  1. Use the training set

  2. Split it into training/test sets

  3. Build a model on the training set

  4. Evaluate on the test set

  5. Repeat and average the estimated errors

Used for:

  1. Picking variables to include in a model

  2. Picking the type of prediction function to use

  3. Picking the parameters in the prediction function

  4. Comparing different predictors


Random subsampling


K-fold


Leave one out


Considerations

  • For time series data data must be used in “chunks”
  • For k-fold cross validation
  • Larger k = less bias, more variance
  • Smaller k = more bias, less variance
  • Random sampling must be done without replacement
  • Random sampling with replacement is the bootstrap
  • Underestimates of the error
  • Can be corrected, but it is complicated (0.632 Bootstrap)
  • If you cross-validate to pick predictors estimate you must estimate errors on independent data.

The caret package

Caret functionality

  • Some preprocessing (cleaning)
  • preProcess
  • Data splitting
  • createDataPartition
  • createResample
  • createTimeSlices
  • Training/testing functions
  • train
  • predict
  • Model comparison
  • confusionMatrix

Machine learning algorithms in R

  • Linear discriminant analysis
  • Regression
  • Naive Bayes
  • Support vector machines
  • Classification and regression trees
  • Random forests
  • Boosting
  • etc.

SPAM Example: Data splitting

library(caret); library(kernlab); data(spam)
inTrain <- createDataPartition(y=spam$type,
                              p=0.75, list=FALSE)
training1 <- spam[inTrain,]
testing <- spam[-inTrain,]
dim(training1)
## [1] 3451   58

SPAM Example: Fit a model

set.seed(32343)
modelFit <- train(type ~.,data=training1, method="glm")
modelFit
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictor
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## Resampling results
## 
##   Accuracy   Kappa      Accuracy SD  Kappa SD  
##   0.9160009  0.8231214  0.005584604  0.01184724
## 
## 

SPAM Example: Final model

modelFit <- train(type ~.,data=training1, method="glm")
modelFit$finalModel
## 
## Call:  NULL
## 
## Coefficients:
##       (Intercept)               make            address  
##        -1.438e+00         -1.839e-01         -1.489e-01  
##               all              num3d                our  
##         1.175e-01          2.480e+00          5.420e-01  
##              over             remove           internet  
##         9.038e-01          2.457e+00          4.973e-01  
##             order               mail            receive  
##         5.102e-01          1.053e-01         -5.919e-01  
##              will             people             report  
##        -1.932e-01         -2.159e-01          2.981e-01  
##         addresses               free           business  
##         8.834e-01          8.883e-01          9.754e-01  
##             email                you             credit  
##         1.735e-01          7.511e-02          9.960e-01  
##              your               font             num000  
##         2.553e-01          1.483e-01          1.911e+00  
##             money                 hp                hpl  
##         6.091e-01         -1.837e+00         -8.798e-01  
##            george             num650                lab  
##        -1.179e+01          4.687e-01         -2.362e+00  
##              labs             telnet             num857  
##        -3.192e-01         -1.544e-01          1.026e+00  
##              data             num415              num85  
##        -8.858e-01          5.891e-01         -2.025e+00  
##        technology            num1999              parts  
##         9.146e-01          3.811e-02          4.856e-01  
##                pm             direct                 cs  
##        -8.030e-01         -4.246e-01         -5.553e+02  
##           meeting           original            project  
##        -2.624e+00         -1.211e+00         -2.089e+00  
##                re                edu              table  
##        -7.711e-01         -1.383e+00         -2.202e+00  
##        conference      charSemicolon   charRoundbracket  
##        -3.981e+00         -1.174e+00         -1.180e-01  
## charSquarebracket    charExclamation         charDollar  
##        -4.938e-01          2.642e-01          5.037e+00  
##          charHash         capitalAve        capitalLong  
##         2.437e+00          3.563e-03          1.021e-02  
##      capitalTotal  
##         8.545e-04  
## 
## Degrees of Freedom: 3450 Total (i.e. Null);  3393 Residual
## Null Deviance:       4628 
## Residual Deviance: 1408  AIC: 1524

SPAM Example: Prediction

predictions <- predict(modelFit,newdata=testing)
predictions
##    [1] spam    spam    spam    spam    nonspam spam    spam    spam   
##    [9] nonspam spam    spam    spam    spam    spam    spam    spam   
##   [17] spam    spam    spam    spam    spam    spam    spam    spam   
##   [25] spam    spam    spam    spam    spam    spam    spam    spam   
##   [33] spam    spam    spam    spam    spam    spam    spam    spam   
##   [41] spam    spam    spam    nonspam spam    nonspam spam    spam   
##   [49] nonspam spam    spam    nonspam spam    spam    spam    spam   
##   [57] spam    spam    spam    spam    nonspam spam    nonspam spam   
##   [65] spam    spam    spam    spam    spam    spam    spam    spam   
##   [73] spam    spam    spam    spam    spam    nonspam nonspam spam   
##   [81] nonspam spam    spam    nonspam spam    spam    spam    spam   
##   [89] spam    spam    spam    nonspam spam    spam    spam    spam   
##   [97] spam    spam    spam    nonspam spam    spam    nonspam spam   
##  [105] spam    spam    spam    spam    spam    spam    spam    spam   
##  [113] spam    spam    spam    spam    spam    spam    spam    spam   
##  [121] spam    spam    spam    spam    spam    spam    nonspam spam   
##  [129] spam    spam    spam    spam    spam    spam    spam    spam   
##  [137] spam    spam    spam    spam    spam    spam    spam    spam   
##  [145] spam    spam    spam    spam    spam    spam    spam    spam   
##  [153] nonspam spam    spam    spam    spam    spam    spam    spam   
##  [161] nonspam spam    spam    nonspam spam    spam    spam    spam   
##  [169] spam    spam    spam    spam    spam    spam    spam    spam   
##  [177] spam    spam    spam    spam    nonspam spam    spam    spam   
##  [185] spam    spam    spam    spam    spam    spam    spam    spam   
##  [193] spam    spam    spam    nonspam spam    spam    spam    spam   
##  [201] spam    spam    spam    spam    spam    spam    spam    spam   
##  [209] spam    spam    spam    spam    spam    spam    spam    spam   
##  [217] spam    spam    spam    spam    spam    nonspam spam    spam   
##  [225] spam    spam    spam    spam    spam    spam    spam    spam   
##  [233] spam    spam    spam    spam    spam    spam    spam    spam   
##  [241] spam    spam    spam    nonspam spam    spam    spam    spam   
##  [249] spam    spam    spam    spam    spam    spam    spam    spam   
##  [257] spam    spam    spam    spam    spam    spam    spam    spam   
##  [265] spam    spam    spam    spam    spam    spam    spam    spam   
##  [273] spam    spam    spam    spam    spam    spam    spam    spam   
##  [281] spam    spam    spam    spam    spam    spam    spam    spam   
##  [289] spam    nonspam spam    spam    spam    spam    spam    spam   
##  [297] nonspam spam    spam    spam    spam    spam    spam    spam   
##  [305] spam    spam    spam    spam    spam    spam    spam    spam   
##  [313] spam    spam    spam    spam    spam    spam    spam    spam   
##  [321] spam    spam    spam    spam    spam    spam    spam    spam   
##  [329] spam    spam    spam    nonspam spam    spam    spam    spam   
##  [337] spam    spam    spam    spam    spam    spam    spam    spam   
##  [345] spam    spam    spam    spam    nonspam spam    spam    spam   
##  [353] spam    spam    spam    spam    spam    spam    spam    spam   
##  [361] spam    nonspam spam    spam    spam    spam    spam    spam   
##  [369] spam    spam    spam    nonspam spam    spam    spam    spam   
##  [377] spam    spam    spam    spam    spam    spam    spam    spam   
##  [385] spam    spam    spam    spam    spam    spam    spam    nonspam
##  [393] spam    spam    spam    spam    spam    spam    spam    spam   
##  [401] spam    spam    spam    spam    nonspam spam    nonspam spam   
##  [409] spam    spam    spam    spam    spam    spam    spam    spam   
##  [417] spam    spam    nonspam nonspam spam    spam    spam    nonspam
##  [425] spam    spam    spam    spam    spam    spam    spam    spam   
##  [433] spam    spam    spam    spam    spam    spam    spam    spam   
##  [441] spam    spam    nonspam spam    spam    spam    spam    spam   
##  [449] spam    spam    spam    spam    spam    nonspam nonspam nonspam
##  [457] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [465] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [473] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [481] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [489] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [497] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [505] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [513] nonspam nonspam nonspam nonspam nonspam nonspam spam    nonspam
##  [521] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [529] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [537] nonspam nonspam nonspam nonspam nonspam spam    nonspam nonspam
##  [545] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [553] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [561] nonspam nonspam nonspam nonspam nonspam nonspam nonspam spam   
##  [569] nonspam spam    spam    nonspam nonspam spam    nonspam nonspam
##  [577] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [585] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [593] nonspam nonspam nonspam nonspam nonspam nonspam nonspam spam   
##  [601] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [609] nonspam spam    nonspam nonspam nonspam nonspam nonspam nonspam
##  [617] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [625] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [633] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [641] nonspam nonspam nonspam spam    nonspam spam    nonspam nonspam
##  [649] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [657] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [665] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [673] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [681] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [689] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [697] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [705] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [713] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [721] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [729] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [737] nonspam nonspam nonspam nonspam nonspam spam    nonspam nonspam
##  [745] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [753] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [761] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [769] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [777] nonspam nonspam nonspam spam    nonspam nonspam nonspam nonspam
##  [785] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [793] nonspam nonspam nonspam nonspam nonspam nonspam spam    spam   
##  [801] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [809] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [817] nonspam nonspam nonspam nonspam nonspam spam    nonspam nonspam
##  [825] nonspam nonspam spam    nonspam nonspam nonspam nonspam nonspam
##  [833] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [841] nonspam nonspam spam    spam    nonspam spam    nonspam nonspam
##  [849] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [857] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [865] nonspam spam    nonspam nonspam nonspam nonspam nonspam nonspam
##  [873] nonspam nonspam nonspam nonspam spam    spam    nonspam nonspam
##  [881] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [889] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [897] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [905] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [913] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [921] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [929] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [937] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [945] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [953] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [961] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [969] spam    nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [977] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [985] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
##  [993] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1001] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1009] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1017] spam    spam    nonspam nonspam nonspam nonspam nonspam nonspam
## [1025] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1033] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1041] nonspam nonspam nonspam spam    nonspam nonspam nonspam nonspam
## [1049] nonspam nonspam nonspam nonspam spam    nonspam nonspam nonspam
## [1057] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1065] nonspam nonspam nonspam nonspam nonspam nonspam nonspam spam   
## [1073] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1081] spam    nonspam nonspam nonspam nonspam nonspam nonspam spam   
## [1089] nonspam nonspam nonspam nonspam nonspam nonspam nonspam spam   
## [1097] spam    spam    nonspam nonspam nonspam nonspam nonspam nonspam
## [1105] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1113] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1121] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1129] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1137] nonspam nonspam nonspam nonspam nonspam nonspam nonspam nonspam
## [1145] nonspam nonspam nonspam nonspam nonspam nonspam
## Levels: nonspam spam

SPAM Example: Confusion Matrix

confusionMatrix(predictions,testing$type)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction nonspam spam
##    nonspam     659   36
##    spam         38  417
##                                           
##                Accuracy : 0.9357          
##                  95% CI : (0.9199, 0.9491)
##     No Information Rate : 0.6061          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8653          
##  Mcnemar's Test P-Value : 0.9075          
##                                           
##             Sensitivity : 0.9455          
##             Specificity : 0.9205          
##          Pos Pred Value : 0.9482          
##          Neg Pred Value : 0.9165          
##              Prevalence : 0.6061          
##          Detection Rate : 0.5730          
##    Detection Prevalence : 0.6043          
##       Balanced Accuracy : 0.9330          
##                                           
##        'Positive' Class : nonspam         
## 

Data slicing

SPAM Example: Data splitting

library(caret); library(kernlab); data(spam)
inTrain <- createDataPartition(y=spam$type,
                              p=0.75, list=FALSE)
training2 <- spam[inTrain,]
testing <- spam[-inTrain,]
dim(training2)
## [1] 3451   58

SPAM Example: K-fold

set.seed(32323)
folds <- createFolds(y=spam$type,k=10,
                             list=TRUE,returnTrain=TRUE)
sapply(folds,length)
## Fold01 Fold02 Fold03 Fold04 Fold05 Fold06 Fold07 Fold08 Fold09 Fold10 
##   4141   4140   4141   4142   4140   4142   4141   4141   4140   4141
folds[[1]][1:10]
##  [1]  1  2  3  4  5  6  7  8  9 10

SPAM Example: Return test

set.seed(32323)
folds <- createFolds(y=spam$type,k=10,
                             list=TRUE,returnTrain=FALSE)
sapply(folds,length)
## Fold01 Fold02 Fold03 Fold04 Fold05 Fold06 Fold07 Fold08 Fold09 Fold10 
##    460    461    460    459    461    459    460    460    461    460
folds[[1]][1:10]
##  [1] 24 27 32 40 41 43 55 58 63 68

SPAM Example: Resampling

set.seed(32323)
folds <- createResample(y=spam$type,times=10,
                             list=TRUE)
sapply(folds,length)
## Resample01 Resample02 Resample03 Resample04 Resample05 Resample06 
##       4601       4601       4601       4601       4601       4601 
## Resample07 Resample08 Resample09 Resample10 
##       4601       4601       4601       4601
folds[[1]][1:10]
##  [1]  1  2  3  3  3  5  5  7  8 12

SPAM Example: Time Slices

set.seed(32323)
tme <- 1:1000
folds <- createTimeSlices(y=tme,initialWindow=20,
                          horizon=10)
names(folds)
## [1] "train" "test"
folds$train[[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
folds$test[[1]]
##  [1] 21 22 23 24 25 26 27 28 29 30

Training options

SPAM Example

library(caret); library(kernlab); data(spam)
inTrain <- createDataPartition(y=spam$type,
                              p=0.75, list=FALSE)
training3 <- spam[inTrain,]
testing <- spam[-inTrain,]
modelFit <- train(type ~.,data=training3, method="glm")
modelFit
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictor
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## Resampling results
## 
##   Accuracy   Kappa      Accuracy SD  Kappa SD  
##   0.9167223  0.8245611  0.01443204   0.03161115
## 
## 

Train options

args(train.default)
## function (x, y, method = "rf", preProcess = NULL, ..., weights = NULL, 
##     metric = ifelse(is.factor(y), "Accuracy", "RMSE"), maximize = ifelse(metric %in% 
##         c("RMSE", "logLoss"), FALSE, TRUE), trControl = trainControl(), 
##     tuneGrid = NULL, tuneLength = 3) 
## NULL

Metric options

Continous outcomes: * RMSE = Root mean squared error * RSquared = \(R^2\) from regression models

Categorical outcomes: * Accuracy = Fraction correct * Kappa = A measure of concordance


trainControl

args(trainControl)
## function (method = "boot", number = ifelse(grepl("cv", method), 
##     10, 25), repeats = ifelse(grepl("cv", method), 1, number), 
##     p = 0.75, search = "grid", initialWindow = NULL, horizon = 1, 
##     fixedWindow = TRUE, verboseIter = FALSE, returnData = TRUE, 
##     returnResamp = "final", savePredictions = FALSE, classProbs = FALSE, 
##     summaryFunction = defaultSummary, selectionFunction = "best", 
##     preProcOptions = list(thresh = 0.95, ICAcomp = 3, k = 5), 
##     sampling = NULL, index = NULL, indexOut = NULL, timingSamps = 0, 
##     predictionBounds = rep(FALSE, 2), seeds = NA, adaptive = list(min = 5, 
##         alpha = 0.05, method = "gls", complete = TRUE), trim = FALSE, 
##     allowParallel = TRUE) 
## NULL

trainControl resampling

  • method
  • boot = bootstrapping
  • boot632 = bootstrapping with adjustment
  • cv = cross validation
  • repeatedcv = repeated cross validation
  • LOOCV = leave one out cross validation
  • number
  • For boot/cross validation
  • Number of subsamples to take
  • repeats
  • Number of times to repeate subsampling
  • If big this can slow things down

Setting the seed

  • It is often useful to set an overall seed
  • You can also set a seed for each resample
  • Seeding each resample is useful for parallel fits

seed example

set.seed(1235)
modelFit2 <- train(type ~.,data=training3, method="glm")
modelFit2
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictor
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## Resampling results
## 
##   Accuracy   Kappa      Accuracy SD  Kappa SD  
##   0.9152739  0.8213541  0.01948848   0.04382679
## 
## 

seed example

set.seed(1235)
modelFit3 <- train(type ~.,data=training3, method="glm")
modelFit3
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictor
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## Resampling results
## 
##   Accuracy   Kappa      Accuracy SD  Kappa SD  
##   0.9152739  0.8213541  0.01948848   0.04382679
## 
## 

Plotting predictors

Example: predicting wages

Image Credit http://www.cahs-media.org/the-high-cost-of-low-wages

Data from: ISLR package from the book: Introduction to statistical learning


Example: Wage data

library(ISLR)
library(ggplot2)
library(caret)
library(Hmisc)
library(gridExtra)
data(Wage)
summary(Wage)
##       year           age               sex                    maritl    
##  Min.   :2003   Min.   :18.00   1. Male  :3000   1. Never Married: 648  
##  1st Qu.:2004   1st Qu.:33.75   2. Female:   0   2. Married      :2074  
##  Median :2006   Median :42.00                    3. Widowed      :  19  
##  Mean   :2006   Mean   :42.41                    4. Divorced     : 204  
##  3rd Qu.:2008   3rd Qu.:51.00                    5. Separated    :  55  
##  Max.   :2009   Max.   :80.00                                           
##                                                                         
##        race                   education                     region    
##  1. White:2480   1. < HS Grad      :268   2. Middle Atlantic   :3000  
##  2. Black: 293   2. HS Grad        :971   1. New England       :   0  
##  3. Asian: 190   3. Some College   :650   3. East North Central:   0  
##  4. Other:  37   4. College Grad   :685   4. West North Central:   0  
##                  5. Advanced Degree:426   5. South Atlantic    :   0  
##                                           6. East South Central:   0  
##                                           (Other)              :   0  
##            jobclass               health      health_ins      logwage     
##  1. Industrial :1544   1. <=Good     : 858   1. Yes:2083   Min.   :3.000  
##  2. Information:1456   2. >=Very Good:2142   2. No : 917   1st Qu.:4.447  
##                                                            Median :4.653  
##                                                            Mean   :4.654  
##                                                            3rd Qu.:4.857  
##                                                            Max.   :5.763  
##                                                                           
##       wage       
##  Min.   : 20.09  
##  1st Qu.: 85.38  
##  Median :104.92  
##  Mean   :111.70  
##  3rd Qu.:128.68  
##  Max.   :318.34  
## 

Get training/test sets

inTrain <- createDataPartition(y=Wage$wage,
                              p=0.7, list=FALSE)
training4 <- Wage[inTrain,]
testing <- Wage[-inTrain,]
dim(training4); dim(testing)
## [1] 2102   12
## [1] 898  12

Feature plot (caret package)

featurePlot(x=training4[,c("age","education","jobclass")],
            y = training4$wage,
            plot="pairs")


Qplot (ggplot2 package)

qplot(age,wage,data=training4)


Qplot with color (ggplot2 package)

qplot(age,wage,colour=jobclass,data=training4)


Add regression smoothers (ggplot2 package)

qq <- qplot(age,wage,colour=education,data=training4)
qq +  geom_smooth(method='lm',formula=y~x)


cut2, making factors (Hmisc package)

cutWage <- cut2(training4$wage,g=3)
table(cutWage)
## cutWage
## [ 23.0, 91.7) [ 91.7,118.9) [118.9,318.3] 
##           701           735           666

Boxplots with cut2 and with points overlayed

p1 <- qplot(cutWage,age, data=training4,fill=cutWage,
      geom=c("boxplot"))
p1

library(gridExtra)
p2 <- qplot(cutWage,age, data=training4,fill=cutWage,
      geom=c("boxplot","jitter"))
grid.arrange(p1,p2,ncol=2)


Tables

t1 <- table(cutWage,training4$jobclass)
t1
##                
## cutWage         1. Industrial 2. Information
##   [ 23.0, 91.7)           445            256
##   [ 91.7,118.9)           378            357
##   [118.9,318.3]           274            392
prop.table(t1,1)
##                
## cutWage         1. Industrial 2. Information
##   [ 23.0, 91.7)     0.6348074      0.3651926
##   [ 91.7,118.9)     0.5142857      0.4857143
##   [118.9,318.3]     0.4114114      0.5885886

Density plots

qplot(wage,colour=education,data=training4,geom="density")


Notes and further reading

  • Make your plots only in the training set
  • Don’t use the test set for exploration!
  • Things you should be looking for
  • Imbalance in outcomes/predictors
  • Outliers
  • Groups of points not explained by a predictor
  • Skewed variables
  • ggplot2 tutorial
  • caret visualizations

Preprocessing

Why preprocess?

library(caret); library(RANN); library(kernlab); data(spam)
inTrain <- createDataPartition(y=spam$type,
                              p=0.75, list=FALSE)
training <- spam[inTrain,]
testing <- spam[-inTrain,]
hist(training$capitalAve,main="",xlab="ave. capital run length")


Why preprocess?

mean(training$capitalAve)
## [1] 5.063805
sd(training$capitalAve)
## [1] 29.47083

Standardizing

trainCapAve <- training$capitalAve
trainCapAveS <- (trainCapAve  - mean(trainCapAve))/sd(trainCapAve) 
mean(trainCapAveS)
## [1] -6.930961e-18
sd(trainCapAveS)
## [1] 1

Standardizing - test set

testCapAve <- testing$capitalAve
testCapAveS <- (testCapAve  - mean(trainCapAve))/sd(trainCapAve) 
mean(testCapAveS)
## [1] 0.0173375
sd(testCapAveS)
## [1] 1.279768

Standardizing - preProcess function

preObj <- preProcess(training[,-58],method=c("center","scale"))
trainCapAveS <- predict(preObj,training[,-58])$capitalAve
mean(trainCapAveS)
## [1] -6.930961e-18
sd(trainCapAveS)
## [1] 1

Standardizing - preProcess function

testCapAveS <- predict(preObj,testing[,-58])$capitalAve
mean(testCapAveS)
## [1] 0.0173375
sd(testCapAveS)
## [1] 1.279768

Standardizing - preProcess argument

set.seed(32343)
modelFit <- train(type ~.,data=training,
                  preProcess=c("center","scale"),method="glm")
modelFit
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictor
##    2 classes: 'nonspam', 'spam' 
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## Resampling results
## 
##   Accuracy   Kappa      Accuracy SD  Kappa SD  
##   0.9187021  0.8295411  0.01319563   0.02493402
## 
## 

Standardizing - Box-Cox transforms

preObj <- preProcess(training[,-58],method=c("BoxCox"))
trainCapAveS <- predict(preObj,training[,-58])$capitalAve
par(mfrow=c(1,2)); hist(trainCapAveS); qqnorm(trainCapAveS)


Standardizing - Imputing data

set.seed(13343)

# Make some values NA
training$capAve <- training$capitalAve
selectNA <- rbinom(dim(training)[1],size=1,prob=0.05)==1
training$capAve[selectNA] <- NA

# Impute and standardize
preObj <- preProcess(training[,-58],method="knnImpute")
capAve <- predict(preObj,training[,-58])$capAve

# Standardize true values
capAveTruth <- training$capitalAve
capAveTruth <- (capAveTruth-mean(capAveTruth))/sd(capAveTruth)

Standardizing - Imputing data

quantile(capAve - capAveTruth)
##           0%          25%          50%          75%         100% 
## -1.953977081  0.001102328  0.001668703  0.001947075  0.135752952
quantile((capAve - capAveTruth)[selectNA])
##          0%         25%         50%         75%        100% 
## -1.95397708 -0.01708111  0.00166803  0.01725343  0.13575295
quantile((capAve - capAveTruth)[!selectNA])
##           0%          25%          50%          75%         100% 
## -0.383298294  0.001135378  0.001668892  0.001930456  0.002154060

Notes and further reading

  • Training and test must be processed in the same way
  • Test transformations will likely be imperfect
  • Especially if the test/training sets collected at different times
  • Careful when transforming factor variables!
  • preprocessing with caret

Covariate creation

Two levels of covariate creation

Level 1: From raw data to covariate

Level 2: Transforming tidy covariates

library(kernlab);data(spam)
spam$capitalAveSq <- spam$capitalAve^2

Level 1, Raw data -> covariates

  • Depends heavily on application
  • The balancing act is summarization vs. information loss
  • Examples:
  • Text files: frequency of words, frequency of phrases (Google ngrams), frequency of capital letters.
  • Images: Edges, corners, blobs, ridges (computer vision feature detection)
  • Webpages: Number and type of images, position of elements, colors, videos (A/B Testing)
  • People: Height, weight, hair color, sex, country of origin.
  • The more knowledge of the system you have the better the job you will do.
  • When in doubt, err on the side of more features
  • Can be automated, but use caution!

Level 2, Tidy covariates -> new covariates

  • More necessary for some methods (regression, svms) than for others (classification trees).
  • Should be done only on the training set
  • The best approach is through exploratory analysis (plotting/tables)
  • New covariates should be added to data frames

Load example data

library(ISLR); library(caret); data(Wage);
inTrain <- createDataPartition(y=Wage$wage,
                              p=0.7, list=FALSE)
training <- Wage[inTrain,]; testing <- Wage[-inTrain,]

Common covariates to add, dummy variables

Basic idea - convert factor variables to indicator variables

table(training$jobclass)
## 
##  1. Industrial 2. Information 
##           1051           1051
dummies <- dummyVars(wage ~ jobclass,data=training)
head(predict(dummies,newdata=training))
##        jobclass.1. Industrial jobclass.2. Information
## 86582                       0                       1
## 161300                      1                       0
## 155159                      0                       1
## 11443                       0                       1
## 376662                      0                       1
## 450601                      1                       0

Removing zero covariates

nsv <- nearZeroVar(training,saveMetrics=TRUE)
nsv
##            freqRatio percentUnique zeroVar   nzv
## year        1.037356    0.33301618   FALSE FALSE
## age         1.027027    2.85442436   FALSE FALSE
## sex         0.000000    0.04757374    TRUE  TRUE
## maritl      3.272931    0.23786870   FALSE FALSE
## race        8.938776    0.19029496   FALSE FALSE
## education   1.389002    0.23786870   FALSE FALSE
## region      0.000000    0.04757374    TRUE  TRUE
## jobclass    1.000000    0.09514748   FALSE FALSE
## health      2.468647    0.09514748   FALSE FALSE
## health_ins  2.352472    0.09514748   FALSE FALSE
## logwage     1.061728   19.17221694   FALSE FALSE
## wage        1.061728   19.17221694   FALSE FALSE

Spline basis

library(splines)
bsBasis <- bs(training$age,df=3) 
bsBasis
##                   1            2            3
##    [1,] 0.236850055 0.0253767916 9.063140e-04
##    [2,] 0.416337988 0.3211750193 8.258786e-02
##    [3,] 0.430813836 0.2910904300 6.556091e-02
##    [4,] 0.362525595 0.3866939680 1.374912e-01
##    [5,] 0.306334128 0.4241549461 1.957638e-01
##    [6,] 0.424154946 0.3063341278 7.374710e-02
##    [7,] 0.377630828 0.0906313987 7.250512e-03
##    [8,] 0.444358195 0.2275981001 3.885821e-02
##    [9,] 0.442218287 0.1953987782 2.877966e-02
##   [10,] 0.362525595 0.3866939680 1.374912e-01
##   [11,] 0.275519452 0.4362391326 2.302373e-01
##   [12,] 0.444093854 0.2114732637 3.356718e-02
##   [13,] 0.443086838 0.2436977611 4.467792e-02
##   [14,] 0.375000000 0.3750000000 1.250000e-01
##   [15,] 0.430813836 0.2910904300 6.556091e-02
##   [16,] 0.426168977 0.1482326877 1.718640e-02
##   [17,] 0.000000000 0.0000000000 0.000000e+00
##   [18,] 0.291090430 0.4308138364 2.125348e-01
##   [19,] 0.349346279 0.3975319727 1.507880e-01
##   [20,] 0.417093250 0.1331148669 1.416116e-02
##   [21,] 0.426168977 0.1482326877 1.718640e-02
##   [22,] 0.438655970 0.1794501695 2.447048e-02
##   [23,] 0.275519452 0.4362391326 2.302373e-01
##   [24,] 0.266544426 0.0339238361 1.439193e-03
##   [25,] 0.406028666 0.1184250277 1.151354e-02
##   [26,] 0.318229499 0.0540389715 3.058810e-03
##   [27,] 0.340371253 0.0654560102 4.195898e-03
##   [28,] 0.318229499 0.0540389715 3.058810e-03
##   [29,] 0.430813836 0.2910904300 6.556091e-02
##   [30,] 0.362525595 0.3866939680 1.374912e-01
##   [31,] 0.444358195 0.2275981001 3.885821e-02
##   [32,] 0.259696720 0.4403553087 2.488965e-01
##   [33,] 0.266544426 0.0339238361 1.439193e-03
##   [34,] 0.430813836 0.2910904300 6.556091e-02
##   [35,] 0.204487093 0.0179374643 5.244873e-04
##   [36,] 0.377630828 0.0906313987 7.250512e-03
##   [37,] 0.195398778 0.4422182874 3.336033e-01
##   [38,] 0.426168977 0.1482326877 1.718640e-02
##   [39,] 0.077678661 0.3601465208 5.565901e-01
##   [40,] 0.386693968 0.3625255950 1.132892e-01
##   [41,] 0.375000000 0.3750000000 1.250000e-01
##   [42,] 0.436239133 0.2755194522 5.800410e-02
##   [43,] 0.442218287 0.1953987782 2.877966e-02
##   [44,] 0.131453291 0.0066840657 1.132892e-04
##   [45,] 0.243697761 0.4430868383 2.685375e-01
##   [46,] 0.266544426 0.0339238361 1.439193e-03
##   [47,] 0.443086838 0.2436977611 4.467792e-02
##   [48,] 0.424154946 0.3063341278 7.374710e-02
##   [49,] 0.424154946 0.3063341278 7.374710e-02
##   [50,] 0.195398778 0.4422182874 3.336033e-01
##   [51,] 0.291090430 0.4308138364 2.125348e-01
##   [52,] 0.436239133 0.2755194522 5.800410e-02
##   [53,] 0.266544426 0.0339238361 1.439193e-03
##   [54,] 0.321175019 0.4163379880 1.798991e-01
##   [55,] 0.397531973 0.3493462791 1.023338e-01
##   [56,] 0.407438488 0.3355375785 9.210835e-02
##   [57,] 0.426168977 0.1482326877 1.718640e-02
##   [58,] 0.169380014 0.0116813803 2.685375e-04
##   [59,] 0.416337988 0.3211750193 8.258786e-02
##   [60,] 0.179450170 0.4386559699 3.574234e-01
##   [61,] 0.306334128 0.4241549461 1.957638e-01
##   [62,] 0.426168977 0.1482326877 1.718640e-02
##   [63,] 0.362525595 0.3866939680 1.374912e-01
##   [64,] 0.407438488 0.3355375785 9.210835e-02
##   [65,] 0.440355309 0.2596967205 5.105149e-02
##   [66,] 0.444093854 0.2114732637 3.356718e-02
##   [67,] 0.433331375 0.1637029640 2.061445e-02
##   [68,] 0.118425028 0.4060286664 4.640328e-01
##   [69,] 0.442218287 0.1953987782 2.877966e-02
##   [70,] 0.444358195 0.2275981001 3.885821e-02
##   [71,] 0.436239133 0.2755194522 5.800410e-02
##   [72,] 0.349346279 0.3975319727 1.507880e-01
##   [73,] 0.444093854 0.2114732637 3.356718e-02
##   [74,] 0.375000000 0.3750000000 1.250000e-01
##   [75,] 0.436239133 0.2755194522 5.800410e-02
##   [76,] 0.430813836 0.2910904300 6.556091e-02
##   [77,] 0.227598100 0.4443581954 2.891855e-01
##   [78,] 0.259696720 0.4403553087 2.488965e-01
##   [79,] 0.266544426 0.0339238361 1.439193e-03
##   [80,] 0.375000000 0.3750000000 1.250000e-01
##   [81,] 0.444093854 0.2114732637 3.356718e-02
##   [82,] 0.195398778 0.4422182874 3.336033e-01
##   [83,] 0.335537578 0.4074384881 1.649156e-01
##   [84,] 0.211473264 0.4440938538 3.108657e-01
##   [85,] 0.407438488 0.3355375785 9.210835e-02
##   [86,] 0.131453291 0.0066840657 1.132892e-04
##   [87,] 0.195398778 0.4422182874 3.336033e-01
##   [88,] 0.406028666 0.1184250277 1.151354e-02
##   [89,] 0.243697761 0.4430868383 2.685375e-01
##   [90,] 0.406028666 0.1184250277 1.151354e-02
##   [91,] 0.169380014 0.0116813803 2.685375e-04
##   [92,] 0.349346279 0.3975319727 1.507880e-01
##   [93,] 0.424154946 0.3063341278 7.374710e-02
##   [94,] 0.211473264 0.4440938538 3.108657e-01
##   [95,] 0.443086838 0.2436977611 4.467792e-02
##   [96,] 0.433331375 0.1637029640 2.061445e-02
##   [97,] 0.433331375 0.1637029640 2.061445e-02
##   [98,] 0.211473264 0.4440938538 3.108657e-01
##   [99,] 0.444093854 0.2114732637 3.356718e-02
##  [100,] 0.321175019 0.4163379880 1.798991e-01
##  [101,] 0.259696720 0.4403553087 2.488965e-01
##  [102,] 0.148232688 0.4261689772 4.084119e-01
##  [103,] 0.433331375 0.1637029640 2.061445e-02
##  [104,] 0.306334128 0.4241549461 1.957638e-01
##  [105,] 0.416337988 0.3211750193 8.258786e-02
##  [106,] 0.243697761 0.4430868383 2.685375e-01
##  [107,] 0.386693968 0.3625255950 1.132892e-01
##  [108,] 0.407438488 0.3355375785 9.210835e-02
##  [109,] 0.407438488 0.3355375785 9.210835e-02
##  [110,] 0.291090430 0.4308138364 2.125348e-01
##  [111,] 0.349346279 0.3975319727 1.507880e-01
##  [112,] 0.375000000 0.3750000000 1.250000e-01
##  [113,] 0.426168977 0.1482326877 1.718640e-02
##  [114,] 0.321175019 0.4163379880 1.798991e-01
##  [115,] 0.443086838 0.2436977611 4.467792e-02
##  [116,] 0.362525595 0.3866939680 1.374912e-01
##  [117,] 0.444358195 0.2275981001 3.885821e-02
##  [118,] 0.335537578 0.4074384881 1.649156e-01
##  [119,] 0.362525595 0.3866939680 1.374912e-01
##  [120,] 0.386693968 0.3625255950 1.132892e-01
##  [121,] 0.397531973 0.3493462791 1.023338e-01
##  [122,] 0.444358195 0.2275981001 3.885821e-02
##  [123,] 0.424154946 0.3063341278 7.374710e-02
##  [124,] 0.442218287 0.1953987782 2.877966e-02
##  [125,] 0.335537578 0.4074384881 1.649156e-01
##  [126,] 0.293645732 0.0435030714 2.148300e-03
##  [127,] 0.392899701 0.1042386963 9.218388e-03
##  [128,] 0.243697761 0.4430868383 2.685375e-01
##  [129,] 0.377630828 0.0906313987 7.250512e-03
##  [130,] 0.318229499 0.0540389715 3.058810e-03
##  [131,] 0.443086838 0.2436977611 4.467792e-02
##  [132,] 0.291090430 0.4308138364 2.125348e-01
##  [133,] 0.433331375 0.1637029640 2.061445e-02
##  [134,] 0.360146521 0.0776786613 5.584740e-03
##  [135,] 0.266544426 0.0339238361 1.439193e-03
##  [136,] 0.443086838 0.2436977611 4.467792e-02
##  [137,] 0.318229499 0.0540389715 3.058810e-03
##  [138,] 0.375000000 0.3750000000 1.250000e-01
##  [139,] 0.169380014 0.0116813803 2.685375e-04
##  [140,] 0.375000000 0.3750000000 1.250000e-01
##  [141,] 0.266544426 0.0339238361 1.439193e-03
##  [142,] 0.360146521 0.0776786613 5.584740e-03
##  [143,] 0.442218287 0.1953987782 2.877966e-02
##  [144,] 0.433331375 0.1637029640 2.061445e-02
##  [145,] 0.243697761 0.4430868383 2.685375e-01
##  [146,] 0.444358195 0.2275981001 3.885821e-02
##  [147,] 0.440355309 0.2596967205 5.105149e-02
##  [148,] 0.442218287 0.1953987782 2.877966e-02
##  [149,] 0.179450170 0.4386559699 3.574234e-01
##  [150,] 0.318229499 0.0540389715 3.058810e-03
##  [151,] 0.442218287 0.1953987782 2.877966e-02
##  [152,] 0.275519452 0.4362391326 2.302373e-01
##  [153,] 0.438655970 0.1794501695 2.447048e-02
##  [154,] 0.204487093 0.0179374643 5.244873e-04
##  [155,] 0.407438488 0.3355375785 9.210835e-02
##  [156,] 0.293645732 0.0435030714 2.148300e-03
##  [157,] 0.430813836 0.2910904300 6.556091e-02
##  [158,] 0.438655970 0.1794501695 2.447048e-02
##  [159,] 0.306334128 0.4241549461 1.957638e-01
##  [160,] 0.443086838 0.2436977611 4.467792e-02
##  [161,] 0.426168977 0.1482326877 1.718640e-02
##  [162,] 0.430813836 0.2910904300 6.556091e-02
##  [163,] 0.227598100 0.4443581954 2.891855e-01
##  [164,] 0.211473264 0.4440938538 3.108657e-01
##  [165,] 0.375000000 0.3750000000 1.250000e-01
##  [166,] 0.416337988 0.3211750193 8.258786e-02
##  [167,] 0.426168977 0.1482326877 1.718640e-02
##  [168,] 0.169380014 0.0116813803 2.685375e-04
##  [169,] 0.443086838 0.2436977611 4.467792e-02
##  [170,] 0.440355309 0.2596967205 5.105149e-02
##  [171,] 0.438655970 0.1794501695 2.447048e-02
##  [172,] 0.397531973 0.3493462791 1.023338e-01
##  [173,] 0.433331375 0.1637029640 2.061445e-02
##  [174,] 0.443086838 0.2436977611 4.467792e-02
##  [175,] 0.259696720 0.4403553087 2.488965e-01
##  [176,] 0.033923836 0.2665444262 6.980925e-01
##  [177,] 0.360146521 0.0776786613 5.584740e-03
##  [178,] 0.377630828 0.0906313987 7.250512e-03
##  [179,] 0.360146521 0.0776786613 5.584740e-03
##  [180,] 0.438655970 0.1794501695 2.447048e-02
##  [181,] 0.444358195 0.2275981001 3.885821e-02
##  [182,] 0.386693968 0.3625255950 1.132892e-01
##  [183,] 0.416337988 0.3211750193 8.258786e-02
##  [184,] 0.362525595 0.3866939680 1.374912e-01
##  [185,] 0.243697761 0.4430868383 2.685375e-01
##  [186,] 0.386693968 0.3625255950 1.132892e-01
##  [187,] 0.440355309 0.2596967205 5.105149e-02
##  [188,] 0.318229499 0.0540389715 3.058810e-03
##  [189,] 0.424154946 0.3063341278 7.374710e-02
##  [190,] 0.406028666 0.1184250277 1.151354e-02
##  [191,] 0.407438488 0.3355375785 9.210835e-02
##  [192,] 0.169380014 0.0116813803 2.685375e-04
##  [193,] 0.321175019 0.4163379880 1.798991e-01
##  [194,] 0.426168977 0.1482326877 1.718640e-02
##  [195,] 0.444093854 0.2114732637 3.356718e-02
##  [196,] 0.266544426 0.0339238361 1.439193e-03
##  [197,] 0.360146521 0.0776786613 5.584740e-03
##  [198,] 0.340371253 0.0654560102 4.195898e-03
##  [199,] 0.291090430 0.4308138364 2.125348e-01
##  [200,] 0.275519452 0.4362391326 2.302373e-01
##  [201,] 0.195398778 0.4422182874 3.336033e-01
##  [202,] 0.397531973 0.3493462791 1.023338e-01
##  [203,] 0.335537578 0.4074384881 1.649156e-01
##  [204,] 0.417093250 0.1331148669 1.416116e-02
##  [205,] 0.243697761 0.4430868383 2.685375e-01
##  [206,] 0.318229499 0.0540389715 3.058810e-03
##  [207,] 0.335537578 0.4074384881 1.649156e-01
##  [208,] 0.416337988 0.3211750193 8.258786e-02
##  [209,] 0.169380014 0.0116813803 2.685375e-04
##  [210,] 0.266544426 0.0339238361 1.439193e-03
##  [211,] 0.438655970 0.1794501695 2.447048e-02
##  [212,] 0.392899701 0.1042386963 9.218388e-03
##  [213,] 0.335537578 0.4074384881 1.649156e-01
##  [214,] 0.407438488 0.3355375785 9.210835e-02
##  [215,] 0.416337988 0.3211750193 8.258786e-02
##  [216,] 0.443086838 0.2436977611 4.467792e-02
##  [217,] 0.436239133 0.2755194522 5.800410e-02
##  [218,] 0.440355309 0.2596967205 5.105149e-02
##  [219,] 0.266544426 0.0339238361 1.439193e-03
##  [220,] 0.236850055 0.0253767916 9.063140e-04
##  [221,] 0.349346279 0.3975319727 1.507880e-01
##  [222,] 0.440355309 0.2596967205 5.105149e-02
##  [223,] 0.377630828 0.0906313987 7.250512e-03
##  [224,] 0.291090430 0.4308138364 2.125348e-01
##  [225,] 0.204487093 0.0179374643 5.244873e-04
##  [226,] 0.211473264 0.4440938538 3.108657e-01
##  [227,] 0.443086838 0.2436977611 4.467792e-02
##  [228,] 0.000000000 0.0000000000 1.000000e+00
##  [229,] 0.443086838 0.2436977611 4.467792e-02
##  [230,] 0.433331375 0.1637029640 2.061445e-02
##  [231,] 0.291090430 0.4308138364 2.125348e-01
##  [232,] 0.236850055 0.0253767916 9.063140e-04
##  [233,] 0.444358195 0.2275981001 3.885821e-02
##  [234,] 0.377630828 0.0906313987 7.250512e-03
##  [235,] 0.090631399 0.3776308281 5.244873e-01
##  [236,] 0.306334128 0.4241549461 1.957638e-01
##  [237,] 0.318229499 0.0540389715 3.058810e-03
##  [238,] 0.426168977 0.1482326877 1.718640e-02
##  [239,] 0.321175019 0.4163379880 1.798991e-01
##  [240,] 0.227598100 0.4443581954 2.891855e-01
##  [241,] 0.416337988 0.3211750193 8.258786e-02
##  [242,] 0.430813836 0.2910904300 6.556091e-02
##  [243,] 0.377630828 0.0906313987 7.250512e-03
##  [244,] 0.436239133 0.2755194522 5.800410e-02
##  [245,] 0.204487093 0.0179374643 5.244873e-04
##  [246,] 0.243697761 0.4430868383 2.685375e-01
##  [247,] 0.417093250 0.1331148669 1.416116e-02
##  [248,] 0.275519452 0.4362391326 2.302373e-01
##  [249,] 0.442218287 0.1953987782 2.877966e-02
##  [250,] 0.417093250 0.1331148669 1.416116e-02
##  [251,] 0.362525595 0.3866939680 1.374912e-01
##  [252,] 0.430813836 0.2910904300 6.556091e-02
##  [253,] 0.321175019 0.4163379880 1.798991e-01
##  [254,] 0.442218287 0.1953987782 2.877966e-02
##  [255,] 0.090631399 0.0030210466 3.356718e-05
##  [256,] 0.293645732 0.0435030714 2.148300e-03
##  [257,] 0.360146521 0.0776786613 5.584740e-03
##  [258,] 0.259696720 0.4403553087 2.488965e-01
##  [259,] 0.397531973 0.3493462791 1.023338e-01
##  [260,] 0.444093854 0.2114732637 3.356718e-02
##  [261,] 0.204487093 0.0179374643 5.244873e-04
##  [262,] 0.392899701 0.1042386963 9.218388e-03
##  [263,] 0.430813836 0.2910904300 6.556091e-02
##  [264,] 0.417093250 0.1331148669 1.416116e-02
##  [265,] 0.386693968 0.3625255950 1.132892e-01
##  [266,] 0.377630828 0.0906313987 7.250512e-03
##  [267,] 0.424154946 0.3063341278 7.374710e-02
##  [268,] 0.444093854 0.2114732637 3.356718e-02
##  [269,] 0.397531973 0.3493462791 1.023338e-01
##  [270,] 0.340371253 0.0654560102 4.195898e-03
##  [271,] 0.204487093 0.0179374643 5.244873e-04
##  [272,] 0.318229499 0.0540389715 3.058810e-03
##  [273,] 0.417093250 0.1331148669 1.416116e-02
##  [274,] 0.375000000 0.3750000000 1.250000e-01
##  [275,] 0.318229499 0.0540389715 3.058810e-03
##  [276,] 0.386693968 0.3625255950 1.132892e-01
##  [277,] 0.444093854 0.2114732637 3.356718e-02
##  [278,] 0.243697761 0.4430868383 2.685375e-01
##  [279,] 0.407438488 0.3355375785 9.210835e-02
##  [280,] 0.321175019 0.4163379880 1.798991e-01
##  [281,] 0.436239133 0.2755194522 5.800410e-02
##  [282,] 0.443086838 0.2436977611 4.467792e-02
##  [283,] 0.433331375 0.1637029640 2.061445e-02
##  [284,] 0.362525595 0.3866939680 1.374912e-01
##  [285,] 0.426168977 0.1482326877 1.718640e-02
##  [286,] 0.386693968 0.3625255950 1.132892e-01
##  [287,] 0.375000000 0.3750000000 1.250000e-01
##  [288,] 0.440355309 0.2596967205 5.105149e-02
##  [289,] 0.243697761 0.4430868383 2.685375e-01
##  [290,] 0.362525595 0.3866939680 1.374912e-01
##  [291,] 0.444093854 0.2114732637 3.356718e-02
##  [292,] 0.377630828 0.0906313987 7.250512e-03
##  [293,] 0.424154946 0.3063341278 7.374710e-02
##  [294,] 0.243697761 0.4430868383 2.685375e-01
##  [295,] 0.416337988 0.3211750193 8.258786e-02
##  [296,] 0.424154946 0.3063341278 7.374710e-02
##  [297,] 0.416337988 0.3211750193 8.258786e-02
##  [298,] 0.349346279 0.3975319727 1.507880e-01
##  [299,] 0.195398778 0.4422182874 3.336033e-01
##  [300,] 0.416337988 0.3211750193 8.258786e-02
##  [301,] 0.426168977 0.1482326877 1.718640e-02
##  [302,] 0.417093250 0.1331148669 1.416116e-02
##  [303,] 0.362525595 0.3866939680 1.374912e-01
##  [304,] 0.362525595 0.3866939680 1.374912e-01
##  [305,] 0.444358195 0.2275981001 3.885821e-02
##  [306,] 0.293645732 0.0435030714 2.148300e-03
##  [307,] 0.375000000 0.3750000000 1.250000e-01
##  [308,] 0.392899701 0.1042386963 9.218388e-03
##  [309,] 0.275519452 0.4362391326 2.302373e-01
##  [310,] 0.195398778 0.4422182874 3.336033e-01
##  [311,] 0.275519452 0.4362391326 2.302373e-01
##  [312,] 0.349346279 0.3975319727 1.507880e-01
##  [313,] 0.436239133 0.2755194522 5.800410e-02
##  [314,] 0.416337988 0.3211750193 8.258786e-02
##  [315,] 0.386693968 0.3625255950 1.132892e-01
##  [316,] 0.417093250 0.1331148669 1.416116e-02
##  [317,] 0.392899701 0.1042386963 9.218388e-03
##  [318,] 0.386693968 0.3625255950 1.132892e-01
##  [319,] 0.433331375 0.1637029640 2.061445e-02
##  [320,] 0.397531973 0.3493462791 1.023338e-01
##  [321,] 0.416337988 0.3211750193 8.258786e-02
##  [322,] 0.407438488 0.3355375785 9.210835e-02
##  [323,] 0.397531973 0.3493462791 1.023338e-01
##  [324,] 0.375000000 0.3750000000 1.250000e-01
##  [325,] 0.438655970 0.1794501695 2.447048e-02
##  [326,] 0.349346279 0.3975319727 1.507880e-01
##  [327,] 0.407438488 0.3355375785 9.210835e-02
##  [328,] 0.430813836 0.2910904300 6.556091e-02
##  [329,] 0.424154946 0.3063341278 7.374710e-02
##  [330,] 0.195398778 0.4422182874 3.336033e-01
##  [331,] 0.442218287 0.1953987782 2.877966e-02
##  [332,] 0.444093854 0.2114732637 3.356718e-02
##  [333,] 0.440355309 0.2596967205 5.105149e-02
##  [334,] 0.377630828 0.0906313987 7.250512e-03
##  [335,] 0.349346279 0.3975319727 1.507880e-01
##  [336,] 0.433331375 0.1637029640 2.061445e-02
##  [337,] 0.318229499 0.0540389715 3.058810e-03
##  [338,] 0.349346279 0.3975319727 1.507880e-01
##  [339,] 0.440355309 0.2596967205 5.105149e-02
##  [340,] 0.163702964 0.4333313752 3.823512e-01
##  [341,] 0.340371253 0.0654560102 4.195898e-03
##  [342,] 0.362525595 0.3866939680 1.374912e-01
##  [343,] 0.440355309 0.2596967205 5.105149e-02
##  [344,] 0.204487093 0.0179374643 5.244873e-04
##  [345,] 0.416337988 0.3211750193 8.258786e-02
##  [346,] 0.163702964 0.4333313752 3.823512e-01
##  [347,] 0.227598100 0.4443581954 2.891855e-01
##  [348,] 0.377630828 0.0906313987 7.250512e-03
##  [349,] 0.416337988 0.3211750193 8.258786e-02
##  [350,] 0.335537578 0.4074384881 1.649156e-01
##  [351,] 0.306334128 0.4241549461 1.957638e-01
##  [352,] 0.377630828 0.0906313987 7.250512e-03
##  [353,] 0.397531973 0.3493462791 1.023338e-01
##  [354,] 0.397531973 0.3493462791 1.023338e-01
##  [355,] 0.444358195 0.2275981001 3.885821e-02
##  [356,] 0.362525595 0.3866939680 1.374912e-01
##  [357,] 0.397531973 0.3493462791 1.023338e-01
##  [358,] 0.416337988 0.3211750193 8.258786e-02
##  [359,] 0.424154946 0.3063341278 7.374710e-02
##  [360,] 0.436239133 0.2755194522 5.800410e-02
##  [361,] 0.275519452 0.4362391326 2.302373e-01
##  [362,] 0.362525595 0.3866939680 1.374912e-01
##  [363,] 0.321175019 0.4163379880 1.798991e-01
##  [364,] 0.444093854 0.2114732637 3.356718e-02
##  [365,] 0.275519452 0.4362391326 2.302373e-01
##  [366,] 0.362525595 0.3866939680 1.374912e-01
##  [367,] 0.375000000 0.3750000000 1.250000e-01
##  [368,] 0.436239133 0.2755194522 5.800410e-02
##  [369,] 0.362525595 0.3866939680 1.374912e-01
##  [370,] 0.321175019 0.4163379880 1.798991e-01
##  [371,] 0.340371253 0.0654560102 4.195898e-03
##  [372,] 0.416337988 0.3211750193 8.258786e-02
##  [373,] 0.236850055 0.0253767916 9.063140e-04
##  [374,] 0.266544426 0.0339238361 1.439193e-03
##  [375,] 0.397531973 0.3493462791 1.023338e-01
##  [376,] 0.444093854 0.2114732637 3.356718e-02
##  [377,] 0.417093250 0.1331148669 1.416116e-02
##  [378,] 0.444358195 0.2275981001 3.885821e-02
##  [379,] 0.407438488 0.3355375785 9.210835e-02
##  [380,] 0.195398778 0.4422182874 3.336033e-01
##  [381,] 0.406028666 0.1184250277 1.151354e-02
##  [382,] 0.195398778 0.4422182874 3.336033e-01
##  [383,] 0.416337988 0.3211750193 8.258786e-02
##  [384,] 0.243697761 0.4430868383 2.685375e-01
##  [385,] 0.266544426 0.0339238361 1.439193e-03
##  [386,] 0.426168977 0.1482326877 1.718640e-02
##  [387,] 0.424154946 0.3063341278 7.374710e-02
##  [388,] 0.148232688 0.4261689772 4.084119e-01
##  [389,] 0.306334128 0.4241549461 1.957638e-01
##  [390,] 0.436239133 0.2755194522 5.800410e-02
##  [391,] 0.392899701 0.1042386963 9.218388e-03
##  [392,] 0.266544426 0.0339238361 1.439193e-03
##  [393,] 0.349346279 0.3975319727 1.507880e-01
##  [394,] 0.340371253 0.0654560102 4.195898e-03
##  [395,] 0.321175019 0.4163379880 1.798991e-01
##  [396,] 0.407438488 0.3355375785 9.210835e-02
##  [397,] 0.444093854 0.2114732637 3.356718e-02
##  [398,] 0.444358195 0.2275981001 3.885821e-02
##  [399,] 0.442218287 0.1953987782 2.877966e-02
##  [400,] 0.227598100 0.4443581954 2.891855e-01
##  [401,] 0.417093250 0.1331148669 1.416116e-02
##  [402,] 0.204487093 0.0179374643 5.244873e-04
##  [403,] 0.442218287 0.1953987782 2.877966e-02
##  [404,] 0.318229499 0.0540389715 3.058810e-03
##  [405,] 0.397531973 0.3493462791 1.023338e-01
##  [406,] 0.335537578 0.4074384881 1.649156e-01
##  [407,] 0.442218287 0.1953987782 2.877966e-02
##  [408,] 0.426168977 0.1482326877 1.718640e-02
##  [409,] 0.349346279 0.3975319727 1.507880e-01
##  [410,] 0.362525595 0.3866939680 1.374912e-01
##  [411,] 0.306334128 0.4241549461 1.957638e-01
##  [412,] 0.362525595 0.3866939680 1.374912e-01
##  [413,] 0.406028666 0.1184250277 1.151354e-02
##  [414,] 0.442218287 0.1953987782 2.877966e-02
##  [415,] 0.046838810 0.0007678494 4.195898e-06
##  [416,] 0.406028666 0.1184250277 1.151354e-02
##  [417,] 0.436239133 0.2755194522 5.800410e-02
##  [418,] 0.430813836 0.2910904300 6.556091e-02
##  [419,] 0.424154946 0.3063341278 7.374710e-02
##  [420,] 0.443086838 0.2436977611 4.467792e-02
##  [421,] 0.430813836 0.2910904300 6.556091e-02
##  [422,] 0.406028666 0.1184250277 1.151354e-02
##  [423,] 0.195398778 0.4422182874 3.336033e-01
##  [424,] 0.397531973 0.3493462791 1.023338e-01
##  [425,] 0.291090430 0.4308138364 2.125348e-01
##  [426,] 0.335537578 0.4074384881 1.649156e-01
##  [427,] 0.318229499 0.0540389715 3.058810e-03
##  [428,] 0.169380014 0.0116813803 2.685375e-04
##  [429,] 0.436239133 0.2755194522 5.800410e-02
##  [430,] 0.392899701 0.1042386963 9.218388e-03
##  [431,] 0.227598100 0.4443581954 2.891855e-01
##  [432,] 0.438655970 0.1794501695 2.447048e-02
##  [433,] 0.406028666 0.1184250277 1.151354e-02
##  [434,] 0.406028666 0.1184250277 1.151354e-02
##  [435,] 0.266544426 0.0339238361 1.439193e-03
##  [436,] 0.430813836 0.2910904300 6.556091e-02
##  [437,] 0.424154946 0.3063341278 7.374710e-02
##  [438,] 0.259696720 0.4403553087 2.488965e-01
##  [439,] 0.440355309 0.2596967205 5.105149e-02
##  [440,] 0.444093854 0.2114732637 3.356718e-02
##  [441,] 0.243697761 0.4430868383 2.685375e-01
##  [442,] 0.227598100 0.4443581954 2.891855e-01
##  [443,] 0.444358195 0.2275981001 3.885821e-02
##  [444,] 0.424154946 0.3063341278 7.374710e-02
##  [445,] 0.065456010 0.3403712531 5.899768e-01
##  [446,] 0.318229499 0.0540389715 3.058810e-03
##  [447,] 0.397531973 0.3493462791 1.023338e-01
##  [448,] 0.360146521 0.0776786613 5.584740e-03
##  [449,] 0.436239133 0.2755194522 5.800410e-02
##  [450,] 0.349346279 0.3975319727 1.507880e-01
##  [451,] 0.444358195 0.2275981001 3.885821e-02
##  [452,] 0.204487093 0.0179374643 5.244873e-04
##  [453,] 0.392899701 0.1042386963 9.218388e-03
##  [454,] 0.227598100 0.4443581954 2.891855e-01
##  [455,] 0.436239133 0.2755194522 5.800410e-02
##  [456,] 0.433331375 0.1637029640 2.061445e-02
##  [457,] 0.444093854 0.2114732637 3.356718e-02
##  [458,] 0.416337988 0.3211750193 8.258786e-02
##  [459,] 0.243697761 0.4430868383 2.685375e-01
##  [460,] 0.293645732 0.0435030714 2.148300e-03
##  [461,] 0.377630828 0.0906313987 7.250512e-03
##  [462,] 0.306334128 0.4241549461 1.957638e-01
##  [463,] 0.335537578 0.4074384881 1.649156e-01
##  [464,] 0.033923836 0.2665444262 6.980925e-01
##  [465,] 0.133114867 0.4170932496 4.356307e-01
##  [466,] 0.321175019 0.4163379880 1.798991e-01
##  [467,] 0.335537578 0.4074384881 1.649156e-01
##  [468,] 0.259696720 0.4403553087 2.488965e-01
##  [469,] 0.406028666 0.1184250277 1.151354e-02
##  [470,] 0.349346279 0.3975319727 1.507880e-01
##  [471,] 0.430813836 0.2910904300 6.556091e-02
##  [472,] 0.362525595 0.3866939680 1.374912e-01
##  [473,] 0.321175019 0.4163379880 1.798991e-01
##  [474,] 0.306334128 0.4241549461 1.957638e-01
##  [475,] 0.443086838 0.2436977611 4.467792e-02
##  [476,] 0.377630828 0.0906313987 7.250512e-03
##  [477,] 0.416337988 0.3211750193 8.258786e-02
##  [478,] 0.291090430 0.4308138364 2.125348e-01
##  [479,] 0.416337988 0.3211750193 8.258786e-02
##  [480,] 0.424154946 0.3063341278 7.374710e-02
##  [481,] 0.442218287 0.1953987782 2.877966e-02
##  [482,] 0.440355309 0.2596967205 5.105149e-02
##  [483,] 0.335537578 0.4074384881 1.649156e-01
##  [484,] 0.291090430 0.4308138364 2.125348e-01
##  [485,] 0.430813836 0.2910904300 6.556091e-02
##  [486,] 0.318229499 0.0540389715 3.058810e-03
##  [487,] 0.430813836 0.2910904300 6.556091e-02
##  [488,] 0.407438488 0.3355375785 9.210835e-02
##  [489,] 0.386693968 0.3625255950 1.132892e-01
##  [490,] 0.360146521 0.0776786613 5.584740e-03
##  [491,] 0.236850055 0.0253767916 9.063140e-04
##  [492,] 0.362525595 0.3866939680 1.374912e-01
##  [493,] 0.236850055 0.0253767916 9.063140e-04
##  [494,] 0.436239133 0.2755194522 5.800410e-02
##  [495,] 0.375000000 0.3750000000 1.250000e-01
##  [496,] 0.443086838 0.2436977611 4.467792e-02
##  [497,] 0.440355309 0.2596967205 5.105149e-02
##  [498,] 0.426168977 0.1482326877 1.718640e-02
##  [499,] 0.236850055 0.0253767916 9.063140e-04
##  [500,] 0.424154946 0.3063341278 7.374710e-02
##  [501,] 0.266544426 0.0339238361 1.439193e-03
##  [502,] 0.443086838 0.2436977611 4.467792e-02
##  [503,] 0.266544426 0.0339238361 1.439193e-03
##  [504,] 0.424154946 0.3063341278 7.374710e-02
##  [505,] 0.243697761 0.4430868383 2.685375e-01
##  [506,] 0.335537578 0.4074384881 1.649156e-01
##  [507,] 0.211473264 0.4440938538 3.108657e-01
##  [508,] 0.349346279 0.3975319727 1.507880e-01
##  [509,] 0.416337988 0.3211750193 8.258786e-02
##  [510,] 0.430813836 0.2910904300 6.556091e-02
##  [511,] 0.416337988 0.3211750193 8.258786e-02
##  [512,] 0.443086838 0.2436977611 4.467792e-02
##  [513,] 0.349346279 0.3975319727 1.507880e-01
##  [514,] 0.335537578 0.4074384881 1.649156e-01
##  [515,] 0.392899701 0.1042386963 9.218388e-03
##  [516,] 0.443086838 0.2436977611 4.467792e-02
##  [517,] 0.293645732 0.0435030714 2.148300e-03
##  [518,] 0.375000000 0.3750000000 1.250000e-01
##  [519,] 0.444093854 0.2114732637 3.356718e-02
##  [520,] 0.362525595 0.3866939680 1.374912e-01
##  [521,] 0.360146521 0.0776786613 5.584740e-03
##  [522,] 0.417093250 0.1331148669 1.416116e-02
##  [523,] 0.179450170 0.4386559699 3.574234e-01
##  [524,] 0.416337988 0.3211750193 8.258786e-02
##  [525,] 0.275519452 0.4362391326 2.302373e-01
##  [526,] 0.243697761 0.4430868383 2.685375e-01
##  [527,] 0.444358195 0.2275981001 3.885821e-02
##  [528,] 0.375000000 0.3750000000 1.250000e-01
##  [529,] 0.236850055 0.0253767916 9.063140e-04
##  [530,] 0.243697761 0.4430868383 2.685375e-01
##  [531,] 0.397531973 0.3493462791 1.023338e-01
##  [532,] 0.440355309 0.2596967205 5.105149e-02
##  [533,] 0.054038972 0.3182294988 6.246727e-01
##  [534,] 0.397531973 0.3493462791 1.023338e-01
##  [535,] 0.444093854 0.2114732637 3.356718e-02
##  [536,] 0.392899701 0.1042386963 9.218388e-03
##  [537,] 0.275519452 0.4362391326 2.302373e-01
##  [538,] 0.424154946 0.3063341278 7.374710e-02
##  [539,] 0.417093250 0.1331148669 1.416116e-02
##  [540,] 0.392899701 0.1042386963 9.218388e-03
##  [541,] 0.291090430 0.4308138364 2.125348e-01
##  [542,] 0.386693968 0.3625255950 1.132892e-01
##  [543,] 0.291090430 0.4308138364 2.125348e-01
##  [544,] 0.407438488 0.3355375785 9.210835e-02
##  [545,] 0.386693968 0.3625255950 1.132892e-01
##  [546,] 0.204487093 0.0179374643 5.244873e-04
##  [547,] 0.211473264 0.4440938538 3.108657e-01
##  [548,] 0.426168977 0.1482326877 1.718640e-02
##  [549,] 0.416337988 0.3211750193 8.258786e-02
##  [550,] 0.340371253 0.0654560102 4.195898e-03
##  [551,] 0.417093250 0.1331148669 1.416116e-02
##  [552,] 0.243697761 0.4430868383 2.685375e-01
##  [553,] 0.397531973 0.3493462791 1.023338e-01
##  [554,] 0.236850055 0.0253767916 9.063140e-04
##  [555,] 0.275519452 0.4362391326 2.302373e-01
##  [556,] 0.275519452 0.4362391326 2.302373e-01
##  [557,] 0.204487093 0.0179374643 5.244873e-04
##  [558,] 0.416337988 0.3211750193 8.258786e-02
##  [559,] 0.243697761 0.4430868383 2.685375e-01
##  [560,] 0.377630828 0.0906313987 7.250512e-03
##  [561,] 0.386693968 0.3625255950 1.132892e-01
##  [562,] 0.442218287 0.1953987782 2.877966e-02
##  [563,] 0.375000000 0.3750000000 1.250000e-01
##  [564,] 0.392899701 0.1042386963 9.218388e-03
##  [565,] 0.335537578 0.4074384881 1.649156e-01
##  [566,] 0.065456010 0.3403712531 5.899768e-01
##  [567,] 0.426168977 0.1482326877 1.718640e-02
##  [568,] 0.444093854 0.2114732637 3.356718e-02
##  [569,] 0.340371253 0.0654560102 4.195898e-03
##  [570,] 0.444093854 0.2114732637 3.356718e-02
##  [571,] 0.444358195 0.2275981001 3.885821e-02
##  [572,] 0.335537578 0.4074384881 1.649156e-01
##  [573,] 0.426168977 0.1482326877 1.718640e-02
##  [574,] 0.417093250 0.1331148669 1.416116e-02
##  [575,] 0.243697761 0.4430868383 2.685375e-01
##  [576,] 0.444093854 0.2114732637 3.356718e-02
##  [577,] 0.444093854 0.2114732637 3.356718e-02
##  [578,] 0.392899701 0.1042386963 9.218388e-03
##  [579,] 0.321175019 0.4163379880 1.798991e-01
##  [580,] 0.131453291 0.0066840657 1.132892e-04
##  [581,] 0.444093854 0.2114732637 3.356718e-02
##  [582,] 0.340371253 0.0654560102 4.195898e-03
##  [583,] 0.406028666 0.1184250277 1.151354e-02
##  [584,] 0.340371253 0.0654560102 4.195898e-03
##  [585,] 0.436239133 0.2755194522 5.800410e-02
##  [586,] 0.340371253 0.0654560102 4.195898e-03
##  [587,] 0.386693968 0.3625255950 1.132892e-01
##  [588,] 0.291090430 0.4308138364 2.125348e-01
##  [589,] 0.442218287 0.1953987782 2.877966e-02
##  [590,] 0.090631399 0.3776308281 5.244873e-01
##  [591,] 0.133114867 0.4170932496 4.356307e-01
##  [592,] 0.442218287 0.1953987782 2.877966e-02
##  [593,] 0.417093250 0.1331148669 1.416116e-02
##  [594,] 0.046838810 0.0007678494 4.195898e-06
##  [595,] 0.362525595 0.3866939680 1.374912e-01
##  [596,] 0.443086838 0.2436977611 4.467792e-02
##  [597,] 0.118425028 0.4060286664 4.640328e-01
##  [598,] 0.433331375 0.1637029640 2.061445e-02
##  [599,] 0.417093250 0.1331148669 1.416116e-02
##  [600,] 0.424154946 0.3063341278 7.374710e-02
##  [601,] 0.397531973 0.3493462791 1.023338e-01
##  [602,] 0.291090430 0.4308138364 2.125348e-01
##  [603,] 0.417093250 0.1331148669 1.416116e-02
##  [604,] 0.275519452 0.4362391326 2.302373e-01
##  [605,] 0.397531973 0.3493462791 1.023338e-01
##  [606,] 0.416337988 0.3211750193 8.258786e-02
##  [607,] 0.424154946 0.3063341278 7.374710e-02
##  [608,] 0.266544426 0.0339238361 1.439193e-03
##  [609,] 0.416337988 0.3211750193 8.258786e-02
##  [610,] 0.275519452 0.4362391326 2.302373e-01
##  [611,] 0.397531973 0.3493462791 1.023338e-01
##  [612,] 0.444358195 0.2275981001 3.885821e-02
##  [613,] 0.386693968 0.3625255950 1.132892e-01
##  [614,] 0.436239133 0.2755194522 5.800410e-02
##  [615,] 0.291090430 0.4308138364 2.125348e-01
##  [616,] 0.195398778 0.4422182874 3.336033e-01
##  [617,] 0.444358195 0.2275981001 3.885821e-02
##  [618,] 0.377630828 0.0906313987 7.250512e-03
##  [619,] 0.375000000 0.3750000000 1.250000e-01
##  [620,] 0.417093250 0.1331148669 1.416116e-02
##  [621,] 0.392899701 0.1042386963 9.218388e-03
##  [622,] 0.291090430 0.4308138364 2.125348e-01
##  [623,] 0.438655970 0.1794501695 2.447048e-02
##  [624,] 0.417093250 0.1331148669 1.416116e-02
##  [625,] 0.386693968 0.3625255950 1.132892e-01
##  [626,] 0.211473264 0.4440938538 3.108657e-01
##  [627,] 0.340371253 0.0654560102 4.195898e-03
##  [628,] 0.360146521 0.0776786613 5.584740e-03
##  [629,] 0.406028666 0.1184250277 1.151354e-02
##  [630,] 0.417093250 0.1331148669 1.416116e-02
##  [631,] 0.443086838 0.2436977611 4.467792e-02
##  [632,] 0.436239133 0.2755194522 5.800410e-02
##  [633,] 0.444358195 0.2275981001 3.885821e-02
##  [634,] 0.424154946 0.3063341278 7.374710e-02
##  [635,] 0.430813836 0.2910904300 6.556091e-02
##  [636,] 0.424154946 0.3063341278 7.374710e-02
##  [637,] 0.360146521 0.0776786613 5.584740e-03
##  [638,] 0.397531973 0.3493462791 1.023338e-01
##  [639,] 0.407438488 0.3355375785 9.210835e-02
##  [640,] 0.335537578 0.4074384881 1.649156e-01
##  [641,] 0.444093854 0.2114732637 3.356718e-02
##  [642,] 0.436239133 0.2755194522 5.800410e-02
##  [643,] 0.275519452 0.4362391326 2.302373e-01
##  [644,] 0.360146521 0.0776786613 5.584740e-03
##  [645,] 0.417093250 0.1331148669 1.416116e-02
##  [646,] 0.417093250 0.1331148669 1.416116e-02
##  [647,] 0.440355309 0.2596967205 5.105149e-02
##  [648,] 0.424154946 0.3063341278 7.374710e-02
##  [649,] 0.416337988 0.3211750193 8.258786e-02
##  [650,] 0.243697761 0.4430868383 2.685375e-01
##  [651,] 0.360146521 0.0776786613 5.584740e-03
##  [652,] 0.436239133 0.2755194522 5.800410e-02
##  [653,] 0.397531973 0.3493462791 1.023338e-01
##  [654,] 0.377630828 0.0906313987 7.250512e-03
##  [655,] 0.444358195 0.2275981001 3.885821e-02
##  [656,] 0.375000000 0.3750000000 1.250000e-01
##  [657,] 0.424154946 0.3063341278 7.374710e-02
##  [658,] 0.306334128 0.4241549461 1.957638e-01
##  [659,] 0.436239133 0.2755194522 5.800410e-02
##  [660,] 0.444358195 0.2275981001 3.885821e-02
##  [661,] 0.377630828 0.0906313987 7.250512e-03
##  [662,] 0.417093250 0.1331148669 1.416116e-02
##  [663,] 0.444093854 0.2114732637 3.356718e-02
##  [664,] 0.335537578 0.4074384881 1.649156e-01
##  [665,] 0.306334128 0.4241549461 1.957638e-01
##  [666,] 0.179450170 0.4386559699 3.574234e-01
##  [667,] 0.259696720 0.4403553087 2.488965e-01
##  [668,] 0.406028666 0.1184250277 1.151354e-02
##  [669,] 0.443086838 0.2436977611 4.467792e-02
##  [670,] 0.375000000 0.3750000000 1.250000e-01
##  [671,] 0.306334128 0.4241549461 1.957638e-01
##  [672,] 0.386693968 0.3625255950 1.132892e-01
##  [673,] 0.407438488 0.3355375785 9.210835e-02
##  [674,] 0.377630828 0.0906313987 7.250512e-03
##  [675,] 0.318229499 0.0540389715 3.058810e-03
##  [676,] 0.291090430 0.4308138364 2.125348e-01
##  [677,] 0.406028666 0.1184250277 1.151354e-02
##  [678,] 0.375000000 0.3750000000 1.250000e-01
##  [679,] 0.362525595 0.3866939680 1.374912e-01
##  [680,] 0.362525595 0.3866939680 1.374912e-01
##  [681,] 0.424154946 0.3063341278 7.374710e-02
##  [682,] 0.259696720 0.4403553087 2.488965e-01
##  [683,] 0.043503071 0.2936457319 6.607029e-01
##  [684,] 0.204487093 0.0179374643 5.244873e-04
##  [685,] 0.392899701 0.1042386963 9.218388e-03
##  [686,] 0.407438488 0.3355375785 9.210835e-02
##  [687,] 0.291090430 0.4308138364 2.125348e-01
##  [688,] 0.424154946 0.3063341278 7.374710e-02
##  [689,] 0.424154946 0.3063341278 7.374710e-02
##  [690,] 0.406028666 0.1184250277 1.151354e-02
##  [691,] 0.211473264 0.4440938538 3.108657e-01
##  [692,] 0.386693968 0.3625255950 1.132892e-01
##  [693,] 0.306334128 0.4241549461 1.957638e-01
##  [694,] 0.360146521 0.0776786613 5.584740e-03
##  [695,] 0.433331375 0.1637029640 2.061445e-02
##  [696,] 0.266544426 0.0339238361 1.439193e-03
##  [697,] 0.349346279 0.3975319727 1.507880e-01
##  [698,] 0.417093250 0.1331148669 1.416116e-02
##  [699,] 0.227598100 0.4443581954 2.891855e-01
##  [700,] 0.179450170 0.4386559699 3.574234e-01
##  [701,] 0.340371253 0.0654560102 4.195898e-03
##  [702,] 0.335537578 0.4074384881 1.649156e-01
##  [703,] 0.360146521 0.0776786613 5.584740e-03
##  [704,] 0.426168977 0.1482326877 1.718640e-02
##  [705,] 0.266544426 0.0339238361 1.439193e-03
##  [706,] 0.118425028 0.4060286664 4.640328e-01
##  [707,] 0.430813836 0.2910904300 6.556091e-02
##  [708,] 0.416337988 0.3211750193 8.258786e-02
##  [709,] 0.433331375 0.1637029640 2.061445e-02
##  [710,] 0.375000000 0.3750000000 1.250000e-01
##  [711,] 0.211473264 0.4440938538 3.108657e-01
##  [712,] 0.291090430 0.4308138364 2.125348e-01
##  [713,] 0.406028666 0.1184250277 1.151354e-02
##  [714,] 0.321175019 0.4163379880 1.798991e-01
##  [715,] 0.259696720 0.4403553087 2.488965e-01
##  [716,] 0.349346279 0.3975319727 1.507880e-01
##  [717,] 0.275519452 0.4362391326 2.302373e-01
##  [718,] 0.377630828 0.0906313987 7.250512e-03
##  [719,] 0.131453291 0.0066840657 1.132892e-04
##  [720,] 0.211473264 0.4440938538 3.108657e-01
##  [721,] 0.211473264 0.4440938538 3.108657e-01
##  [722,] 0.386693968 0.3625255950 1.132892e-01
##  [723,] 0.444358195 0.2275981001 3.885821e-02
##  [724,] 0.406028666 0.1184250277 1.151354e-02
##  [725,] 0.349346279 0.3975319727 1.507880e-01
##  [726,] 0.424154946 0.3063341278 7.374710e-02
##  [727,] 0.407438488 0.3355375785 9.210835e-02
##  [728,] 0.236850055 0.0253767916 9.063140e-04
##  [729,] 0.442218287 0.1953987782 2.877966e-02
##  [730,] 0.043503071 0.2936457319 6.607029e-01
##  [731,] 0.362525595 0.3866939680 1.374912e-01
##  [732,] 0.318229499 0.0540389715 3.058810e-03
##  [733,] 0.440355309 0.2596967205 5.105149e-02
##  [734,] 0.090631399 0.0030210466 3.356718e-05
##  [735,] 0.375000000 0.3750000000 1.250000e-01
##  [736,] 0.266544426 0.0339238361 1.439193e-03
##  [737,] 0.321175019 0.4163379880 1.798991e-01
##  [738,] 0.416337988 0.3211750193 8.258786e-02
##  [739,] 0.406028666 0.1184250277 1.151354e-02
##  [740,] 0.397531973 0.3493462791 1.023338e-01
##  [741,] 0.293645732 0.0435030714 2.148300e-03
##  [742,] 0.392899701 0.1042386963 9.218388e-03
##  [743,] 0.406028666 0.1184250277 1.151354e-02
##  [744,] 0.362525595 0.3866939680 1.374912e-01
##  [745,] 0.375000000 0.3750000000 1.250000e-01
##  [746,] 0.266544426 0.0339238361 1.439193e-03
##  [747,] 0.211473264 0.4440938538 3.108657e-01
##  [748,] 0.179450170 0.4386559699 3.574234e-01
##  [749,] 0.163702964 0.4333313752 3.823512e-01
##  [750,] 0.360146521 0.0776786613 5.584740e-03
##  [751,] 0.349346279 0.3975319727 1.507880e-01
##  [752,] 0.340371253 0.0654560102 4.195898e-03
##  [753,] 0.438655970 0.1794501695 2.447048e-02
##  [754,] 0.340371253 0.0654560102 4.195898e-03
##  [755,] 0.444093854 0.2114732637 3.356718e-02
##  [756,] 0.433331375 0.1637029640 2.061445e-02
##  [757,] 0.407438488 0.3355375785 9.210835e-02
##  [758,] 0.442218287 0.1953987782 2.877966e-02
##  [759,] 0.227598100 0.4443581954 2.891855e-01
##  [760,] 0.349346279 0.3975319727 1.507880e-01
##  [761,] 0.293645732 0.0435030714 2.148300e-03
##  [762,] 0.406028666 0.1184250277 1.151354e-02
##  [763,] 0.204487093 0.0179374643 5.244873e-04
##  [764,] 0.362525595 0.3866939680 1.374912e-01
##  [765,] 0.266544426 0.0339238361 1.439193e-03
##  [766,] 0.430813836 0.2910904300 6.556091e-02
##  [767,] 0.438655970 0.1794501695 2.447048e-02
##  [768,] 0.362525595 0.3866939680 1.374912e-01
##  [769,] 0.426168977 0.1482326877 1.718640e-02
##  [770,] 0.426168977 0.1482326877 1.718640e-02
##  [771,] 0.444358195 0.2275981001 3.885821e-02
##  [772,] 0.443086838 0.2436977611 4.467792e-02
##  [773,] 0.406028666 0.1184250277 1.151354e-02
##  [774,] 0.163702964 0.4333313752 3.823512e-01
##  [775,] 0.104238696 0.3928997013 4.936432e-01
##  [776,] 0.444358195 0.2275981001 3.885821e-02
##  [777,] 0.392899701 0.1042386963 9.218388e-03
##  [778,] 0.195398778 0.4422182874 3.336033e-01
##  [779,] 0.131453291 0.0066840657 1.132892e-04
##  [780,] 0.321175019 0.4163379880 1.798991e-01
##  [781,] 0.436239133 0.2755194522 5.800410e-02
##  [782,] 0.306334128 0.4241549461 1.957638e-01
##  [783,] 0.438655970 0.1794501695 2.447048e-02
##  [784,] 0.211473264 0.4440938538 3.108657e-01
##  [785,] 0.436239133 0.2755194522 5.800410e-02
##  [786,] 0.440355309 0.2596967205 5.105149e-02
##  [787,] 0.426168977 0.1482326877 1.718640e-02
##  [788,] 0.169380014 0.0116813803 2.685375e-04
##  [789,] 0.397531973 0.3493462791 1.023338e-01
##  [790,] 0.227598100 0.4443581954 2.891855e-01
##  [791,] 0.360146521 0.0776786613 5.584740e-03
##  [792,] 0.406028666 0.1184250277 1.151354e-02
##  [793,] 0.375000000 0.3750000000 1.250000e-01
##  [794,] 0.417093250 0.1331148669 1.416116e-02
##  [795,] 0.349346279 0.3975319727 1.507880e-01
##  [796,] 0.442218287 0.1953987782 2.877966e-02
##  [797,] 0.163702964 0.4333313752 3.823512e-01
##  [798,] 0.443086838 0.2436977611 4.467792e-02
##  [799,] 0.416337988 0.3211750193 8.258786e-02
##  [800,] 0.133114867 0.4170932496 4.356307e-01
##  [801,] 0.362525595 0.3866939680 1.374912e-01
##  [802,] 0.386693968 0.3625255950 1.132892e-01
##  [803,] 0.377630828 0.0906313987 7.250512e-03
##  [804,] 0.442218287 0.1953987782 2.877966e-02
##  [805,] 0.349346279 0.3975319727 1.507880e-01
##  [806,] 0.291090430 0.4308138364 2.125348e-01
##  [807,] 0.417093250 0.1331148669 1.416116e-02
##  [808,] 0.426168977 0.1482326877 1.718640e-02
##  [809,] 0.375000000 0.3750000000 1.250000e-01
##  [810,] 0.179450170 0.4386559699 3.574234e-01
##  [811,] 0.392899701 0.1042386963 9.218388e-03
##  [812,] 0.430813836 0.2910904300 6.556091e-02
##  [813,] 0.430813836 0.2910904300 6.556091e-02
##  [814,] 0.386693968 0.3625255950 1.132892e-01
##  [815,] 0.386693968 0.3625255950 1.132892e-01
##  [816,] 0.360146521 0.0776786613 5.584740e-03
##  [817,] 0.335537578 0.4074384881 1.649156e-01
##  [818,] 0.443086838 0.2436977611 4.467792e-02
##  [819,] 0.306334128 0.4241549461 1.957638e-01
##  [820,] 0.444093854 0.2114732637 3.356718e-02
##  [821,] 0.340371253 0.0654560102 4.195898e-03
##  [822,] 0.417093250 0.1331148669 1.416116e-02
##  [823,] 0.424154946 0.3063341278 7.374710e-02
##  [824,] 0.440355309 0.2596967205 5.105149e-02
##  [825,] 0.392899701 0.1042386963 9.218388e-03
##  [826,] 0.236850055 0.0253767916 9.063140e-04
##  [827,] 0.426168977 0.1482326877 1.718640e-02
##  [828,] 0.340371253 0.0654560102 4.195898e-03
##  [829,] 0.377630828 0.0906313987 7.250512e-03
##  [830,] 0.416337988 0.3211750193 8.258786e-02
##  [831,] 0.433331375 0.1637029640 2.061445e-02
##  [832,] 0.397531973 0.3493462791 1.023338e-01
##  [833,] 0.054038972 0.3182294988 6.246727e-01
##  [834,] 0.444358195 0.2275981001 3.885821e-02
##  [835,] 0.440355309 0.2596967205 5.105149e-02
##  [836,] 0.090631399 0.0030210466 3.356718e-05
##  [837,] 0.426168977 0.1482326877 1.718640e-02
##  [838,] 0.293645732 0.0435030714 2.148300e-03
##  [839,] 0.349346279 0.3975319727 1.507880e-01
##  [840,] 0.266544426 0.0339238361 1.439193e-03
##  [841,] 0.442218287 0.1953987782 2.877966e-02
##  [842,] 0.291090430 0.4308138364 2.125348e-01
##  [843,] 0.444358195 0.2275981001 3.885821e-02
##  [844,] 0.407438488 0.3355375785 9.210835e-02
##  [845,] 0.386693968 0.3625255950 1.132892e-01
##  [846,] 0.306334128 0.4241549461 1.957638e-01
##  [847,] 0.386693968 0.3625255950 1.132892e-01
##  [848,] 0.397531973 0.3493462791 1.023338e-01
##  [849,] 0.090631399 0.0030210466 3.356718e-05
##  [850,] 0.442218287 0.1953987782 2.877966e-02
##  [851,] 0.407438488 0.3355375785 9.210835e-02
##  [852,] 0.306334128 0.4241549461 1.957638e-01
##  [853,] 0.349346279 0.3975319727 1.507880e-01
##  [854,] 0.406028666 0.1184250277 1.151354e-02
##  [855,] 0.433331375 0.1637029640 2.061445e-02
##  [856,] 0.179450170 0.4386559699 3.574234e-01
##  [857,] 0.397531973 0.3493462791 1.023338e-01
##  [858,] 0.340371253 0.0654560102 4.195898e-03
##  [859,] 0.195398778 0.4422182874 3.336033e-01
##  [860,] 0.293645732 0.0435030714 2.148300e-03
##  [861,] 0.436239133 0.2755194522 5.800410e-02
##  [862,] 0.392899701 0.1042386963 9.218388e-03
##  [863,] 0.424154946 0.3063341278 7.374710e-02
##  [864,] 0.407438488 0.3355375785 9.210835e-02
##  [865,] 0.306334128 0.4241549461 1.957638e-01
##  [866,] 0.443086838 0.2436977611 4.467792e-02
##  [867,] 0.444093854 0.2114732637 3.356718e-02
##  [868,] 0.430813836 0.2910904300 6.556091e-02
##  [869,] 0.377630828 0.0906313987 7.250512e-03
##  [870,] 0.243697761 0.4430868383 2.685375e-01
##  [871,] 0.416337988 0.3211750193 8.258786e-02
##  [872,] 0.397531973 0.3493462791 1.023338e-01
##  [873,] 0.397531973 0.3493462791 1.023338e-01
##  [874,] 0.227598100 0.4443581954 2.891855e-01
##  [875,] 0.443086838 0.2436977611 4.467792e-02
##  [876,] 0.436239133 0.2755194522 5.800410e-02
##  [877,] 0.360146521 0.0776786613 5.584740e-03
##  [878,] 0.243697761 0.4430868383 2.685375e-01
##  [879,] 0.433331375 0.1637029640 2.061445e-02
##  [880,] 0.386693968 0.3625255950 1.132892e-01
##  [881,] 0.318229499 0.0540389715 3.058810e-03
##  [882,] 0.443086838 0.2436977611 4.467792e-02
##  [883,] 0.426168977 0.1482326877 1.718640e-02
##  [884,] 0.090631399 0.0030210466 3.356718e-05
##  [885,] 0.362525595 0.3866939680 1.374912e-01
##  [886,] 0.436239133 0.2755194522 5.800410e-02
##  [887,] 0.416337988 0.3211750193 8.258786e-02
##  [888,] 0.227598100 0.4443581954 2.891855e-01
##  [889,] 0.104238696 0.3928997013 4.936432e-01
##  [890,] 0.293645732 0.0435030714 2.148300e-03
##  [891,] 0.426168977 0.1482326877 1.718640e-02
##  [892,] 0.424154946 0.3063341278 7.374710e-02
##  [893,] 0.321175019 0.4163379880 1.798991e-01
##  [894,] 0.306334128 0.4241549461 1.957638e-01
##  [895,] 0.291090430 0.4308138364 2.125348e-01
##  [896,] 0.377630828 0.0906313987 7.250512e-03
##  [897,] 0.386693968 0.3625255950 1.132892e-01
##  [898,] 0.386693968 0.3625255950 1.132892e-01
##  [899,] 0.377630828 0.0906313987 7.250512e-03
##  [900,] 0.266544426 0.0339238361 1.439193e-03
##  [901,] 0.227598100 0.4443581954 2.891855e-01
##  [902,] 0.444093854 0.2114732637 3.356718e-02
##  [903,] 0.443086838 0.2436977611 4.467792e-02
##  [904,] 0.438655970 0.1794501695 2.447048e-02
##  [905,] 0.340371253 0.0654560102 4.195898e-03
##  [906,] 0.426168977 0.1482326877 1.718640e-02
##  [907,] 0.444358195 0.2275981001 3.885821e-02
##  [908,] 0.340371253 0.0654560102 4.195898e-03
##  [909,] 0.318229499 0.0540389715 3.058810e-03
##  [910,] 0.426168977 0.1482326877 1.718640e-02
##  [911,] 0.444093854 0.2114732637 3.356718e-02
##  [912,] 0.349346279 0.3975319727 1.507880e-01
##  [913,] 0.436239133 0.2755194522 5.800410e-02
##  [914,] 0.406028666 0.1184250277 1.151354e-02
##  [915,] 0.318229499 0.0540389715 3.058810e-03
##  [916,] 0.349346279 0.3975319727 1.507880e-01
##  [917,] 0.266544426 0.0339238361 1.439193e-03
##  [918,] 0.211473264 0.4440938538 3.108657e-01
##  [919,] 0.179450170 0.4386559699 3.574234e-01
##  [920,] 0.321175019 0.4163379880 1.798991e-01
##  [921,] 0.444358195 0.2275981001 3.885821e-02
##  [922,] 0.204487093 0.0179374643 5.244873e-04
##  [923,] 0.397531973 0.3493462791 1.023338e-01
##  [924,] 0.406028666 0.1184250277 1.151354e-02
##  [925,] 0.259696720 0.4403553087 2.488965e-01
##  [926,] 0.243697761 0.4430868383 2.685375e-01
##  [927,] 0.397531973 0.3493462791 1.023338e-01
##  [928,] 0.440355309 0.2596967205 5.105149e-02
##  [929,] 0.318229499 0.0540389715 3.058810e-03
##  [930,] 0.046838810 0.0007678494 4.195898e-06
##  [931,] 0.424154946 0.3063341278 7.374710e-02
##  [932,] 0.406028666 0.1184250277 1.151354e-02
##  [933,] 0.392899701 0.1042386963 9.218388e-03
##  [934,] 0.362525595 0.3866939680 1.374912e-01
##  [935,] 0.335537578 0.4074384881 1.649156e-01
##  [936,] 0.417093250 0.1331148669 1.416116e-02
##  [937,] 0.360146521 0.0776786613 5.584740e-03
##  [938,] 0.426168977 0.1482326877 1.718640e-02
##  [939,] 0.169380014 0.0116813803 2.685375e-04
##  [940,] 0.436239133 0.2755194522 5.800410e-02
##  [941,] 0.424154946 0.3063341278 7.374710e-02
##  [942,] 0.416337988 0.3211750193 8.258786e-02
##  [943,] 0.407438488 0.3355375785 9.210835e-02
##  [944,] 0.227598100 0.4443581954 2.891855e-01
##  [945,] 0.335537578 0.4074384881 1.649156e-01
##  [946,] 0.416337988 0.3211750193 8.258786e-02
##  [947,] 0.321175019 0.4163379880 1.798991e-01
##  [948,] 0.340371253 0.0654560102 4.195898e-03
##  [949,] 0.335537578 0.4074384881 1.649156e-01
##  [950,] 0.440355309 0.2596967205 5.105149e-02
##  [951,] 0.424154946 0.3063341278 7.374710e-02
##  [952,] 0.386693968 0.3625255950 1.132892e-01
##  [953,] 0.397531973 0.3493462791 1.023338e-01
##  [954,] 0.392899701 0.1042386963 9.218388e-03
##  [955,] 0.340371253 0.0654560102 4.195898e-03
##  [956,] 0.416337988 0.3211750193 8.258786e-02
##  [957,] 0.275519452 0.4362391326 2.302373e-01
##  [958,] 0.397531973 0.3493462791 1.023338e-01
##  [959,] 0.440355309 0.2596967205 5.105149e-02
##  [960,] 0.375000000 0.3750000000 1.250000e-01
##  [961,] 0.386693968 0.3625255950 1.132892e-01
##  [962,] 0.259696720 0.4403553087 2.488965e-01
##  [963,] 0.416337988 0.3211750193 8.258786e-02
##  [964,] 0.335537578 0.4074384881 1.649156e-01
##  [965,] 0.349346279 0.3975319727 1.507880e-01
##  [966,] 0.407438488 0.3355375785 9.210835e-02
##  [967,] 0.416337988 0.3211750193 8.258786e-02
##  [968,] 0.443086838 0.2436977611 4.467792e-02
##  [969,] 0.386693968 0.3625255950 1.132892e-01
##  [970,] 0.397531973 0.3493462791 1.023338e-01
##  [971,] 0.416337988 0.3211750193 8.258786e-02
##  [972,] 0.375000000 0.3750000000 1.250000e-01
##  [973,] 0.259696720 0.4403553087 2.488965e-01
##  [974,] 0.006684066 0.1314532913 8.617494e-01
##  [975,] 0.386693968 0.3625255950 1.132892e-01
##  [976,] 0.275519452 0.4362391326 2.302373e-01
##  [977,] 0.444358195 0.2275981001 3.885821e-02
##  [978,] 0.424154946 0.3063341278 7.374710e-02
##  [979,] 0.375000000 0.3750000000 1.250000e-01
##  [980,] 0.243697761 0.4430868383 2.685375e-01
##  [981,] 0.407438488 0.3355375785 9.210835e-02
##  [982,] 0.293645732 0.0435030714 2.148300e-03
##  [983,] 0.195398778 0.4422182874 3.336033e-01
##  [984,] 0.179450170 0.4386559699 3.574234e-01
##  [985,] 0.397531973 0.3493462791 1.023338e-01
##  [986,] 0.443086838 0.2436977611 4.467792e-02
##  [987,] 0.433331375 0.1637029640 2.061445e-02
##  [988,] 0.195398778 0.4422182874 3.336033e-01
##  [989,] 0.416337988 0.3211750193 8.258786e-02
##  [990,] 0.318229499 0.0540389715 3.058810e-03
##  [991,] 0.360146521 0.0776786613 5.584740e-03
##  [992,] 0.362525595 0.3866939680 1.374912e-01
##  [993,] 0.266544426 0.0339238361 1.439193e-03
##  [994,] 0.440355309 0.2596967205 5.105149e-02
##  [995,] 0.444093854 0.2114732637 3.356718e-02
##  [996,] 0.438655970 0.1794501695 2.447048e-02
##  [997,] 0.204487093 0.0179374643 5.244873e-04
##  [998,] 0.340371253 0.0654560102 4.195898e-03
##  [999,] 0.436239133 0.2755194522 5.800410e-02
## [1000,] 0.442218287 0.1953987782 2.877966e-02
## [1001,] 0.243697761 0.4430868383 2.685375e-01
## [1002,] 0.148232688 0.4261689772 4.084119e-01
## [1003,] 0.416337988 0.3211750193 8.258786e-02
## [1004,] 0.443086838 0.2436977611 4.467792e-02
## [1005,] 0.291090430 0.4308138364 2.125348e-01
## [1006,] 0.407438488 0.3355375785 9.210835e-02
## [1007,] 0.291090430 0.4308138364 2.125348e-01
## [1008,] 0.321175019 0.4163379880 1.798991e-01
## [1009,] 0.417093250 0.1331148669 1.416116e-02
## [1010,] 0.306334128 0.4241549461 1.957638e-01
## [1011,] 0.406028666 0.1184250277 1.151354e-02
## [1012,] 0.306334128 0.4241549461 1.957638e-01
## [1013,] 0.444093854 0.2114732637 3.356718e-02
## [1014,] 0.392899701 0.1042386963 9.218388e-03
## [1015,] 0.440355309 0.2596967205 5.105149e-02
## [1016,] 0.416337988 0.3211750193 8.258786e-02
## [1017,] 0.375000000 0.3750000000 1.250000e-01
## [1018,] 0.362525595 0.3866939680 1.374912e-01
## [1019,] 0.443086838 0.2436977611 4.467792e-02
## [1020,] 0.360146521 0.0776786613 5.584740e-03
## [1021,] 0.406028666 0.1184250277 1.151354e-02
## [1022,] 0.349346279 0.3975319727 1.507880e-01
## [1023,] 0.436239133 0.2755194522 5.800410e-02
## [1024,] 0.227598100 0.4443581954 2.891855e-01
## [1025,] 0.392899701 0.1042386963 9.218388e-03
## [1026,] 0.360146521 0.0776786613 5.584740e-03
## [1027,] 0.293645732 0.0435030714 2.148300e-03
## [1028,] 0.362525595 0.3866939680 1.374912e-01
## [1029,] 0.179450170 0.4386559699 3.574234e-01
## [1030,] 0.433331375 0.1637029640 2.061445e-02
## [1031,] 0.169380014 0.0116813803 2.685375e-04
## [1032,] 0.291090430 0.4308138364 2.125348e-01
## [1033,] 0.163702964 0.4333313752 3.823512e-01
## [1034,] 0.430813836 0.2910904300 6.556091e-02
## [1035,] 0.375000000 0.3750000000 1.250000e-01
## [1036,] 0.438655970 0.1794501695 2.447048e-02
## [1037,] 0.293645732 0.0435030714 2.148300e-03
## [1038,] 0.407438488 0.3355375785 9.210835e-02
## [1039,] 0.169380014 0.0116813803 2.685375e-04
## [1040,] 0.163702964 0.4333313752 3.823512e-01
## [1041,] 0.424154946 0.3063341278 7.374710e-02
## [1042,] 0.349346279 0.3975319727 1.507880e-01
## [1043,] 0.407438488 0.3355375785 9.210835e-02
## [1044,] 0.430813836 0.2910904300 6.556091e-02
## [1045,] 0.443086838 0.2436977611 4.467792e-02
## [1046,] 0.440355309 0.2596967205 5.105149e-02
## [1047,] 0.349346279 0.3975319727 1.507880e-01
## [1048,] 0.426168977 0.1482326877 1.718640e-02
## [1049,] 0.416337988 0.3211750193 8.258786e-02
## [1050,] 0.433331375 0.1637029640 2.061445e-02
## [1051,] 0.417093250 0.1331148669 1.416116e-02
## [1052,] 0.407438488 0.3355375785 9.210835e-02
## [1053,] 0.424154946 0.3063341278 7.374710e-02
## [1054,] 0.362525595 0.3866939680 1.374912e-01
## [1055,] 0.291090430 0.4308138364 2.125348e-01
## [1056,] 0.375000000 0.3750000000 1.250000e-01
## [1057,] 0.397531973 0.3493462791 1.023338e-01
## [1058,] 0.443086838 0.2436977611 4.467792e-02
## [1059,] 0.131453291 0.0066840657 1.132892e-04
## [1060,] 0.211473264 0.4440938538 3.108657e-01
## [1061,] 0.275519452 0.4362391326 2.302373e-01
## [1062,] 0.195398778 0.4422182874 3.336033e-01
## [1063,] 0.424154946 0.3063341278 7.374710e-02
## [1064,] 0.430813836 0.2910904300 6.556091e-02
## [1065,] 0.360146521 0.0776786613 5.584740e-03
## [1066,] 0.444093854 0.2114732637 3.356718e-02
## [1067,] 0.293645732 0.0435030714 2.148300e-03
## [1068,] 0.340371253 0.0654560102 4.195898e-03
## [1069,] 0.416337988 0.3211750193 8.258786e-02
## [1070,] 0.444358195 0.2275981001 3.885821e-02
## [1071,] 0.417093250 0.1331148669 1.416116e-02
## [1072,] 0.424154946 0.3063341278 7.374710e-02
## [1073,] 0.386693968 0.3625255950 1.132892e-01
## [1074,] 0.416337988 0.3211750193 8.258786e-02
## [1075,] 0.275519452 0.4362391326 2.302373e-01
## [1076,] 0.443086838 0.2436977611 4.467792e-02
## [1077,] 0.054038972 0.3182294988 6.246727e-01
## [1078,] 0.377630828 0.0906313987 7.250512e-03
## [1079,] 0.416337988 0.3211750193 8.258786e-02
## [1080,] 0.440355309 0.2596967205 5.105149e-02
## [1081,] 0.443086838 0.2436977611 4.467792e-02
## [1082,] 0.227598100 0.4443581954 2.891855e-01
## [1083,] 0.444093854 0.2114732637 3.356718e-02
## [1084,] 0.293645732 0.0435030714 2.148300e-03
## [1085,] 0.321175019 0.4163379880 1.798991e-01
## [1086,] 0.407438488 0.3355375785 9.210835e-02
## [1087,] 0.436239133 0.2755194522 5.800410e-02
## [1088,] 0.377630828 0.0906313987 7.250512e-03
## [1089,] 0.426168977 0.1482326877 1.718640e-02
## [1090,] 0.335537578 0.4074384881 1.649156e-01
## [1091,] 0.335537578 0.4074384881 1.649156e-01
## [1092,] 0.306334128 0.4241549461 1.957638e-01
## [1093,] 0.397531973 0.3493462791 1.023338e-01
## [1094,] 0.131453291 0.0066840657 1.132892e-04
## [1095,] 0.043503071 0.2936457319 6.607029e-01
## [1096,] 0.444093854 0.2114732637 3.356718e-02
## [1097,] 0.321175019 0.4163379880 1.798991e-01
## [1098,] 0.433331375 0.1637029640 2.061445e-02
## [1099,] 0.211473264 0.4440938538 3.108657e-01
## [1100,] 0.444358195 0.2275981001 3.885821e-02
## [1101,] 0.195398778 0.4422182874 3.336033e-01
## [1102,] 0.148232688 0.4261689772 4.084119e-01
## [1103,] 0.407438488 0.3355375785 9.210835e-02
## [1104,] 0.266544426 0.0339238361 1.439193e-03
## [1105,] 0.000000000 0.0000000000 1.000000e+00
## [1106,] 0.349346279 0.3975319727 1.507880e-01
## [1107,] 0.243697761 0.4430868383 2.685375e-01
## [1108,] 0.335537578 0.4074384881 1.649156e-01
## [1109,] 0.416337988 0.3211750193 8.258786e-02
## [1110,] 0.392899701 0.1042386963 9.218388e-03
## [1111,] 0.375000000 0.3750000000 1.250000e-01
## [1112,] 0.397531973 0.3493462791 1.023338e-01
## [1113,] 0.444358195 0.2275981001 3.885821e-02
## [1114,] 0.321175019 0.4163379880 1.798991e-01
## [1115,] 0.442218287 0.1953987782 2.877966e-02
## [1116,] 0.335537578 0.4074384881 1.649156e-01
## [1117,] 0.444358195 0.2275981001 3.885821e-02
## [1118,] 0.163702964 0.4333313752 3.823512e-01
## [1119,] 0.204487093 0.0179374643 5.244873e-04
## [1120,] 0.179450170 0.4386559699 3.574234e-01
## [1121,] 0.430813836 0.2910904300 6.556091e-02
## [1122,] 0.426168977 0.1482326877 1.718640e-02
## [1123,] 0.444093854 0.2114732637 3.356718e-02
## [1124,] 0.266544426 0.0339238361 1.439193e-03
## [1125,] 0.377630828 0.0906313987 7.250512e-03
## [1126,] 0.417093250 0.1331148669 1.416116e-02
## [1127,] 0.360146521 0.0776786613 5.584740e-03
## [1128,] 0.406028666 0.1184250277 1.151354e-02
## [1129,] 0.306334128 0.4241549461 1.957638e-01
## [1130,] 0.236850055 0.0253767916 9.063140e-04
## [1131,] 0.377630828 0.0906313987 7.250512e-03
## [1132,] 0.397531973 0.3493462791 1.023338e-01
## [1133,] 0.424154946 0.3063341278 7.374710e-02
## [1134,] 0.440355309 0.2596967205 5.105149e-02
## [1135,] 0.306334128 0.4241549461 1.957638e-01
## [1136,] 0.266544426 0.0339238361 1.439193e-03
## [1137,] 0.375000000 0.3750000000 1.250000e-01
## [1138,] 0.433331375 0.1637029640 2.061445e-02
## [1139,] 0.118425028 0.4060286664 4.640328e-01
## [1140,] 0.259696720 0.4403553087 2.488965e-01
## [1141,] 0.397531973 0.3493462791 1.023338e-01
## [1142,] 0.275519452 0.4362391326 2.302373e-01
## [1143,] 0.426168977 0.1482326877 1.718640e-02
## [1144,] 0.204487093 0.0179374643 5.244873e-04
## [1145,] 0.430813836 0.2910904300 6.556091e-02
## [1146,] 0.438655970 0.1794501695 2.447048e-02
## [1147,] 0.169380014 0.0116813803 2.685375e-04
## [1148,] 0.362525595 0.3866939680 1.374912e-01
## [1149,] 0.243697761 0.4430868383 2.685375e-01
## [1150,] 0.424154946 0.3063341278 7.374710e-02
## [1151,] 0.362525595 0.3866939680 1.374912e-01
## [1152,] 0.291090430 0.4308138364 2.125348e-01
## [1153,] 0.406028666 0.1184250277 1.151354e-02
## [1154,] 0.362525595 0.3866939680 1.374912e-01
## [1155,] 0.236850055 0.0253767916 9.063140e-04
## [1156,] 0.321175019 0.4163379880 1.798991e-01
## [1157,] 0.266544426 0.0339238361 1.439193e-03
## [1158,] 0.259696720 0.4403553087 2.488965e-01
## [1159,] 0.430813836 0.2910904300 6.556091e-02
## [1160,] 0.443086838 0.2436977611 4.467792e-02
## [1161,] 0.444358195 0.2275981001 3.885821e-02
## [1162,] 0.406028666 0.1184250277 1.151354e-02
## [1163,] 0.386693968 0.3625255950 1.132892e-01
## [1164,] 0.433331375 0.1637029640 2.061445e-02
## [1165,] 0.335537578 0.4074384881 1.649156e-01
## [1166,] 0.362525595 0.3866939680 1.374912e-01
## [1167,] 0.433331375 0.1637029640 2.061445e-02
## [1168,] 0.318229499 0.0540389715 3.058810e-03
## [1169,] 0.259696720 0.4403553087 2.488965e-01
## [1170,] 0.386693968 0.3625255950 1.132892e-01
## [1171,] 0.440355309 0.2596967205 5.105149e-02
## [1172,] 0.227598100 0.4443581954 2.891855e-01
## [1173,] 0.291090430 0.4308138364 2.125348e-01
## [1174,] 0.426168977 0.1482326877 1.718640e-02
## [1175,] 0.430813836 0.2910904300 6.556091e-02
## [1176,] 0.430813836 0.2910904300 6.556091e-02
## [1177,] 0.417093250 0.1331148669 1.416116e-02
## [1178,] 0.131453291 0.0066840657 1.132892e-04
## [1179,] 0.306334128 0.4241549461 1.957638e-01
## [1180,] 0.306334128 0.4241549461 1.957638e-01
## [1181,] 0.433331375 0.1637029640 2.061445e-02
## [1182,] 0.204487093 0.0179374643 5.244873e-04
## [1183,] 0.195398778 0.4422182874 3.336033e-01
## [1184,] 0.349346279 0.3975319727 1.507880e-01
## [1185,] 0.090631399 0.0030210466 3.356718e-05
## [1186,] 0.349346279 0.3975319727 1.507880e-01
## [1187,] 0.133114867 0.4170932496 4.356307e-01
## [1188,] 0.442218287 0.1953987782 2.877966e-02
## [1189,] 0.236850055 0.0253767916 9.063140e-04
## [1190,] 0.438655970 0.1794501695 2.447048e-02
## [1191,] 0.417093250 0.1331148669 1.416116e-02
## [1192,] 0.438655970 0.1794501695 2.447048e-02
## [1193,] 0.406028666 0.1184250277 1.151354e-02
## [1194,] 0.416337988 0.3211750193 8.258786e-02
## [1195,] 0.417093250 0.1331148669 1.416116e-02
## [1196,] 0.397531973 0.3493462791 1.023338e-01
## [1197,] 0.442218287 0.1953987782 2.877966e-02
## [1198,] 0.259696720 0.4403553087 2.488965e-01
## [1199,] 0.397531973 0.3493462791 1.023338e-01
## [1200,] 0.360146521 0.0776786613 5.584740e-03
## [1201,] 0.442218287 0.1953987782 2.877966e-02
## [1202,] 0.259696720 0.4403553087 2.488965e-01
## [1203,] 0.444358195 0.2275981001 3.885821e-02
## [1204,] 0.227598100 0.4443581954 2.891855e-01
## [1205,] 0.392899701 0.1042386963 9.218388e-03
## [1206,] 0.293645732 0.0435030714 2.148300e-03
## [1207,] 0.444093854 0.2114732637 3.356718e-02
## [1208,] 0.349346279 0.3975319727 1.507880e-01
## [1209,] 0.406028666 0.1184250277 1.151354e-02
## [1210,] 0.375000000 0.3750000000 1.250000e-01
## [1211,] 0.443086838 0.2436977611 4.467792e-02
## [1212,] 0.211473264 0.4440938538 3.108657e-01
## [1213,] 0.377630828 0.0906313987 7.250512e-03
## [1214,] 0.440355309 0.2596967205 5.105149e-02
## [1215,] 0.406028666 0.1184250277 1.151354e-02
## [1216,] 0.440355309 0.2596967205 5.105149e-02
## [1217,] 0.321175019 0.4163379880 1.798991e-01
## [1218,] 0.433331375 0.1637029640 2.061445e-02
## [1219,] 0.430813836 0.2910904300 6.556091e-02
## [1220,] 0.362525595 0.3866939680 1.374912e-01
## [1221,] 0.046838810 0.0007678494 4.195898e-06
## [1222,] 0.321175019 0.4163379880 1.798991e-01
## [1223,] 0.169380014 0.0116813803 2.685375e-04
## [1224,] 0.375000000 0.3750000000 1.250000e-01
## [1225,] 0.417093250 0.1331148669 1.416116e-02
## [1226,] 0.392899701 0.1042386963 9.218388e-03
## [1227,] 0.430813836 0.2910904300 6.556091e-02
## [1228,] 0.443086838 0.2436977611 4.467792e-02
## [1229,] 0.386693968 0.3625255950 1.132892e-01
## [1230,] 0.407438488 0.3355375785 9.210835e-02
## [1231,] 0.243697761 0.4430868383 2.685375e-01
## [1232,] 0.362525595 0.3866939680 1.374912e-01
## [1233,] 0.444093854 0.2114732637 3.356718e-02
## [1234,] 0.417093250 0.1331148669 1.416116e-02
## [1235,] 0.335537578 0.4074384881 1.649156e-01
## [1236,] 0.321175019 0.4163379880 1.798991e-01
## [1237,] 0.442218287 0.1953987782 2.877966e-02
## [1238,] 0.306334128 0.4241549461 1.957638e-01
## [1239,] 0.306334128 0.4241549461 1.957638e-01
## [1240,] 0.266544426 0.0339238361 1.439193e-03
## [1241,] 0.433331375 0.1637029640 2.061445e-02
## [1242,] 0.360146521 0.0776786613 5.584740e-03
## [1243,] 0.430813836 0.2910904300 6.556091e-02
## [1244,] 0.291090430 0.4308138364 2.125348e-01
## [1245,] 0.386693968 0.3625255950 1.132892e-01
## [1246,] 0.436239133 0.2755194522 5.800410e-02
## [1247,] 0.430813836 0.2910904300 6.556091e-02
## [1248,] 0.406028666 0.1184250277 1.151354e-02
## [1249,] 0.090631399 0.0030210466 3.356718e-05
## [1250,] 0.430813836 0.2910904300 6.556091e-02
## [1251,] 0.243697761 0.4430868383 2.685375e-01
## [1252,] 0.444093854 0.2114732637 3.356718e-02
## [1253,] 0.204487093 0.0179374643 5.244873e-04
## [1254,] 0.306334128 0.4241549461 1.957638e-01
## [1255,] 0.118425028 0.4060286664 4.640328e-01
## [1256,] 0.397531973 0.3493462791 1.023338e-01
## [1257,] 0.444358195 0.2275981001 3.885821e-02
## [1258,] 0.433331375 0.1637029640 2.061445e-02
## [1259,] 0.443086838 0.2436977611 4.467792e-02
## [1260,] 0.443086838 0.2436977611 4.467792e-02
## [1261,] 0.433331375 0.1637029640 2.061445e-02
## [1262,] 0.293645732 0.0435030714 2.148300e-03
## [1263,] 0.204487093 0.0179374643 5.244873e-04
## [1264,] 0.195398778 0.4422182874 3.336033e-01
## [1265,] 0.236850055 0.0253767916 9.063140e-04
## [1266,] 0.362525595 0.3866939680 1.374912e-01
## [1267,] 0.169380014 0.0116813803 2.685375e-04
## [1268,] 0.179450170 0.4386559699 3.574234e-01
## [1269,] 0.440355309 0.2596967205 5.105149e-02
## [1270,] 0.306334128 0.4241549461 1.957638e-01
## [1271,] 0.360146521 0.0776786613 5.584740e-03
## [1272,] 0.444358195 0.2275981001 3.885821e-02
## [1273,] 0.054038972 0.3182294988 6.246727e-01
## [1274,] 0.169380014 0.0116813803 2.685375e-04
## [1275,] 0.386693968 0.3625255950 1.132892e-01
## [1276,] 0.433331375 0.1637029640 2.061445e-02
## [1277,] 0.407438488 0.3355375785 9.210835e-02
## [1278,] 0.291090430 0.4308138364 2.125348e-01
## [1279,] 0.438655970 0.1794501695 2.447048e-02
## [1280,] 0.131453291 0.0066840657 1.132892e-04
## [1281,] 0.440355309 0.2596967205 5.105149e-02
## [1282,] 0.406028666 0.1184250277 1.151354e-02
## [1283,] 0.438655970 0.1794501695 2.447048e-02
## [1284,] 0.340371253 0.0654560102 4.195898e-03
## [1285,] 0.440355309 0.2596967205 5.105149e-02
## [1286,] 0.291090430 0.4308138364 2.125348e-01
## [1287,] 0.424154946 0.3063341278 7.374710e-02
## [1288,] 0.440355309 0.2596967205 5.105149e-02
## [1289,] 0.259696720 0.4403553087 2.488965e-01
## [1290,] 0.291090430 0.4308138364 2.125348e-01
## [1291,] 0.438655970 0.1794501695 2.447048e-02
## [1292,] 0.430813836 0.2910904300 6.556091e-02
## [1293,] 0.318229499 0.0540389715 3.058810e-03
## [1294,] 0.406028666 0.1184250277 1.151354e-02
## [1295,] 0.444093854 0.2114732637 3.356718e-02
## [1296,] 0.340371253 0.0654560102 4.195898e-03
## [1297,] 0.436239133 0.2755194522 5.800410e-02
## [1298,] 0.349346279 0.3975319727 1.507880e-01
## [1299,] 0.291090430 0.4308138364 2.125348e-01
## [1300,] 0.444358195 0.2275981001 3.885821e-02
## [1301,] 0.436239133 0.2755194522 5.800410e-02
## [1302,] 0.204487093 0.0179374643 5.244873e-04
## [1303,] 0.443086838 0.2436977611 4.467792e-02
## [1304,] 0.443086838 0.2436977611 4.467792e-02
## [1305,] 0.349346279 0.3975319727 1.507880e-01
## [1306,] 0.011681380 0.1693800141 8.186701e-01
## [1307,] 0.318229499 0.0540389715 3.058810e-03
## [1308,] 0.266544426 0.0339238361 1.439193e-03
## [1309,] 0.318229499 0.0540389715 3.058810e-03
## [1310,] 0.417093250 0.1331148669 1.416116e-02
## [1311,] 0.349346279 0.3975319727 1.507880e-01
## [1312,] 0.169380014 0.0116813803 2.685375e-04
## [1313,] 0.397531973 0.3493462791 1.023338e-01
## [1314,] 0.426168977 0.1482326877 1.718640e-02
## [1315,] 0.397531973 0.3493462791 1.023338e-01
## [1316,] 0.392899701 0.1042386963 9.218388e-03
## [1317,] 0.397531973 0.3493462791 1.023338e-01
## [1318,] 0.375000000 0.3750000000 1.250000e-01
## [1319,] 0.443086838 0.2436977611 4.467792e-02
## [1320,] 0.349346279 0.3975319727 1.507880e-01
## [1321,] 0.392899701 0.1042386963 9.218388e-03
## [1322,] 0.386693968 0.3625255950 1.132892e-01
## [1323,] 0.275519452 0.4362391326 2.302373e-01
## [1324,] 0.407438488 0.3355375785 9.210835e-02
## [1325,] 0.321175019 0.4163379880 1.798991e-01
## [1326,] 0.406028666 0.1184250277 1.151354e-02
## [1327,] 0.291090430 0.4308138364 2.125348e-01
## [1328,] 0.433331375 0.1637029640 2.061445e-02
## [1329,] 0.417093250 0.1331148669 1.416116e-02
## [1330,] 0.417093250 0.1331148669 1.416116e-02
## [1331,] 0.440355309 0.2596967205 5.105149e-02
## [1332,] 0.436239133 0.2755194522 5.800410e-02
## [1333,] 0.243697761 0.4430868383 2.685375e-01
## [1334,] 0.416337988 0.3211750193 8.258786e-02
## [1335,] 0.397531973 0.3493462791 1.023338e-01
## [1336,] 0.426168977 0.1482326877 1.718640e-02
## [1337,] 0.430813836 0.2910904300 6.556091e-02
## [1338,] 0.243697761 0.4430868383 2.685375e-01
## [1339,] 0.424154946 0.3063341278 7.374710e-02
## [1340,] 0.438655970 0.1794501695 2.447048e-02
## [1341,] 0.397531973 0.3493462791 1.023338e-01
## [1342,] 0.275519452 0.4362391326 2.302373e-01
## [1343,] 0.444093854 0.2114732637 3.356718e-02
## [1344,] 0.424154946 0.3063341278 7.374710e-02
## [1345,] 0.275519452 0.4362391326 2.302373e-01
## [1346,] 0.349346279 0.3975319727 1.507880e-01
## [1347,] 0.440355309 0.2596967205 5.105149e-02
## [1348,] 0.335537578 0.4074384881 1.649156e-01
## [1349,] 0.318229499 0.0540389715 3.058810e-03
## [1350,] 0.335537578 0.4074384881 1.649156e-01
## [1351,] 0.349346279 0.3975319727 1.507880e-01
## [1352,] 0.349346279 0.3975319727 1.507880e-01
## [1353,] 0.340371253 0.0654560102 4.195898e-03
## [1354,] 0.375000000 0.3750000000 1.250000e-01
## [1355,] 0.195398778 0.4422182874 3.336033e-01
## [1356,] 0.204487093 0.0179374643 5.244873e-04
## [1357,] 0.321175019 0.4163379880 1.798991e-01
## [1358,] 0.291090430 0.4308138364 2.125348e-01
## [1359,] 0.386693968 0.3625255950 1.132892e-01
## [1360,] 0.362525595 0.3866939680 1.374912e-01
## [1361,] 0.375000000 0.3750000000 1.250000e-01
## [1362,] 0.375000000 0.3750000000 1.250000e-01
## [1363,] 0.430813836 0.2910904300 6.556091e-02
## [1364,] 0.407438488 0.3355375785 9.210835e-02
## [1365,] 0.386693968 0.3625255950 1.132892e-01
## [1366,] 0.046838810 0.0007678494 4.195898e-06
## [1367,] 0.275519452 0.4362391326 2.302373e-01
## [1368,] 0.424154946 0.3063341278 7.374710e-02
## [1369,] 0.436239133 0.2755194522 5.800410e-02
## [1370,] 0.406028666 0.1184250277 1.151354e-02
## [1371,] 0.406028666 0.1184250277 1.151354e-02
## [1372,] 0.430813836 0.2910904300 6.556091e-02
## [1373,] 0.259696720 0.4403553087 2.488965e-01
## [1374,] 0.104238696 0.3928997013 4.936432e-01
## [1375,] 0.392899701 0.1042386963 9.218388e-03
## [1376,] 0.375000000 0.3750000000 1.250000e-01
## [1377,] 0.440355309 0.2596967205 5.105149e-02
## [1378,] 0.433331375 0.1637029640 2.061445e-02
## [1379,] 0.417093250 0.1331148669 1.416116e-02
## [1380,] 0.321175019 0.4163379880 1.798991e-01
## [1381,] 0.430813836 0.2910904300 6.556091e-02
## [1382,] 0.438655970 0.1794501695 2.447048e-02
## [1383,] 0.444093854 0.2114732637 3.356718e-02
## [1384,] 0.243697761 0.4430868383 2.685375e-01
## [1385,] 0.416337988 0.3211750193 8.258786e-02
## [1386,] 0.426168977 0.1482326877 1.718640e-02
## [1387,] 0.131453291 0.0066840657 1.132892e-04
## [1388,] 0.444358195 0.2275981001 3.885821e-02
## [1389,] 0.340371253 0.0654560102 4.195898e-03
## [1390,] 0.306334128 0.4241549461 1.957638e-01
## [1391,] 0.236850055 0.0253767916 9.063140e-04
## [1392,] 0.392899701 0.1042386963 9.218388e-03
## [1393,] 0.424154946 0.3063341278 7.374710e-02
## [1394,] 0.377630828 0.0906313987 7.250512e-03
## [1395,] 0.440355309 0.2596967205 5.105149e-02
## [1396,] 0.293645732 0.0435030714 2.148300e-03
## [1397,] 0.406028666 0.1184250277 1.151354e-02
## [1398,] 0.436239133 0.2755194522 5.800410e-02
## [1399,] 0.424154946 0.3063341278 7.374710e-02
## [1400,] 0.377630828 0.0906313987 7.250512e-03
## [1401,] 0.243697761 0.4430868383 2.685375e-01
## [1402,] 0.417093250 0.1331148669 1.416116e-02
## [1403,] 0.340371253 0.0654560102 4.195898e-03
## [1404,] 0.430813836 0.2910904300 6.556091e-02
## [1405,] 0.375000000 0.3750000000 1.250000e-01
## [1406,] 0.438655970 0.1794501695 2.447048e-02
## [1407,] 0.397531973 0.3493462791 1.023338e-01
## [1408,] 0.426168977 0.1482326877 1.718640e-02
## [1409,] 0.179450170 0.4386559699 3.574234e-01
## [1410,] 0.424154946 0.3063341278 7.374710e-02
## [1411,] 0.386693968 0.3625255950 1.132892e-01
## [1412,] 0.275519452 0.4362391326 2.302373e-01
## [1413,] 0.362525595 0.3866939680 1.374912e-01
## [1414,] 0.377630828 0.0906313987 7.250512e-03
## [1415,] 0.426168977 0.1482326877 1.718640e-02
## [1416,] 0.349346279 0.3975319727 1.507880e-01
## [1417,] 0.321175019 0.4163379880 1.798991e-01
## [1418,] 0.443086838 0.2436977611 4.467792e-02
## [1419,] 0.426168977 0.1482326877 1.718640e-02
## [1420,] 0.438655970 0.1794501695 2.447048e-02
## [1421,] 0.306334128 0.4241549461 1.957638e-01
## [1422,] 0.179450170 0.4386559699 3.574234e-01
## [1423,] 0.417093250 0.1331148669 1.416116e-02
## [1424,] 0.424154946 0.3063341278 7.374710e-02
## [1425,] 0.000000000 0.0000000000 1.000000e+00
## [1426,] 0.349346279 0.3975319727 1.507880e-01
## [1427,] 0.211473264 0.4440938538 3.108657e-01
## [1428,] 0.417093250 0.1331148669 1.416116e-02
## [1429,] 0.340371253 0.0654560102 4.195898e-03
## [1430,] 0.275519452 0.4362391326 2.302373e-01
## [1431,] 0.275519452 0.4362391326 2.302373e-01
## [1432,] 0.426168977 0.1482326877 1.718640e-02
## [1433,] 0.416337988 0.3211750193 8.258786e-02
## [1434,] 0.275519452 0.4362391326 2.302373e-01
## [1435,] 0.340371253 0.0654560102 4.195898e-03
## [1436,] 0.442218287 0.1953987782 2.877966e-02
## [1437,] 0.275519452 0.4362391326 2.302373e-01
## [1438,] 0.169380014 0.0116813803 2.685375e-04
## [1439,] 0.211473264 0.4440938538 3.108657e-01
## [1440,] 0.377630828 0.0906313987 7.250512e-03
## [1441,] 0.362525595 0.3866939680 1.374912e-01
## [1442,] 0.444093854 0.2114732637 3.356718e-02
## [1443,] 0.291090430 0.4308138364 2.125348e-01
## [1444,] 0.444358195 0.2275981001 3.885821e-02
## [1445,] 0.436239133 0.2755194522 5.800410e-02
## [1446,] 0.054038972 0.3182294988 6.246727e-01
## [1447,] 0.375000000 0.3750000000 1.250000e-01
## [1448,] 0.416337988 0.3211750193 8.258786e-02
## [1449,] 0.440355309 0.2596967205 5.105149e-02
## [1450,] 0.417093250 0.1331148669 1.416116e-02
## [1451,] 0.397531973 0.3493462791 1.023338e-01
## [1452,] 0.204487093 0.0179374643 5.244873e-04
## [1453,] 0.406028666 0.1184250277 1.151354e-02
## [1454,] 0.377630828 0.0906313987 7.250512e-03
## [1455,] 0.306334128 0.4241549461 1.957638e-01
## [1456,] 0.335537578 0.4074384881 1.649156e-01
## [1457,] 0.377630828 0.0906313987 7.250512e-03
## [1458,] 0.406028666 0.1184250277 1.151354e-02
## [1459,] 0.321175019 0.4163379880 1.798991e-01
## [1460,] 0.392899701 0.1042386963 9.218388e-03
## [1461,] 0.362525595 0.3866939680 1.374912e-01
## [1462,] 0.440355309 0.2596967205 5.105149e-02
## [1463,] 0.397531973 0.3493462791 1.023338e-01
## [1464,] 0.442218287 0.1953987782 2.877966e-02
## [1465,] 0.236850055 0.0253767916 9.063140e-04
## [1466,] 0.321175019 0.4163379880 1.798991e-01
## [1467,] 0.444358195 0.2275981001 3.885821e-02
## [1468,] 0.397531973 0.3493462791 1.023338e-01
## [1469,] 0.438655970 0.1794501695 2.447048e-02
## [1470,] 0.211473264 0.4440938538 3.108657e-01
## [1471,] 0.430813836 0.2910904300 6.556091e-02
## [1472,] 0.090631399 0.0030210466 3.356718e-05
## [1473,] 0.318229499 0.0540389715 3.058810e-03
## [1474,] 0.362525595 0.3866939680 1.374912e-01
## [1475,] 0.275519452 0.4362391326 2.302373e-01
## [1476,] 0.046838810 0.0007678494 4.195898e-06
## [1477,] 0.433331375 0.1637029640 2.061445e-02
## [1478,] 0.416337988 0.3211750193 8.258786e-02
## [1479,] 0.306334128 0.4241549461 1.957638e-01
## [1480,] 0.436239133 0.2755194522 5.800410e-02
## [1481,] 0.349346279 0.3975319727 1.507880e-01
## [1482,] 0.386693968 0.3625255950 1.132892e-01
## [1483,] 0.362525595 0.3866939680 1.374912e-01
## [1484,] 0.442218287 0.1953987782 2.877966e-02
## [1485,] 0.444093854 0.2114732637 3.356718e-02
## [1486,] 0.440355309 0.2596967205 5.105149e-02
## [1487,] 0.349346279 0.3975319727 1.507880e-01
## [1488,] 0.349346279 0.3975319727 1.507880e-01
## [1489,] 0.430813836 0.2910904300 6.556091e-02
## [1490,] 0.426168977 0.1482326877 1.718640e-02
## [1491,] 0.430813836 0.2910904300 6.556091e-02
## [1492,] 0.227598100 0.4443581954 2.891855e-01
## [1493,] 0.195398778 0.4422182874 3.336033e-01
## [1494,] 0.375000000 0.3750000000 1.250000e-01
## [1495,] 0.306334128 0.4241549461 1.957638e-01
## [1496,] 0.440355309 0.2596967205 5.105149e-02
## [1497,] 0.360146521 0.0776786613 5.584740e-03
## [1498,] 0.118425028 0.4060286664 4.640328e-01
## [1499,] 0.426168977 0.1482326877 1.718640e-02
## [1500,] 0.440355309 0.2596967205 5.105149e-02
## [1501,] 0.293645732 0.0435030714 2.148300e-03
## [1502,] 0.306334128 0.4241549461 1.957638e-01
## [1503,] 0.424154946 0.3063341278 7.374710e-02
## [1504,] 0.321175019 0.4163379880 1.798991e-01
## [1505,] 0.306334128 0.4241549461 1.957638e-01
## [1506,] 0.179450170 0.4386559699 3.574234e-01
## [1507,] 0.443086838 0.2436977611 4.467792e-02
## [1508,] 0.444358195 0.2275981001 3.885821e-02
## [1509,] 0.291090430 0.4308138364 2.125348e-01
## [1510,] 0.259696720 0.4403553087 2.488965e-01
## [1511,] 0.416337988 0.3211750193 8.258786e-02
## [1512,] 0.340371253 0.0654560102 4.195898e-03
## [1513,] 0.243697761 0.4430868383 2.685375e-01
## [1514,] 0.335537578 0.4074384881 1.649156e-01
## [1515,] 0.392899701 0.1042386963 9.218388e-03
## [1516,] 0.163702964 0.4333313752 3.823512e-01
## [1517,] 0.436239133 0.2755194522 5.800410e-02
## [1518,] 0.377630828 0.0906313987 7.250512e-03
## [1519,] 0.335537578 0.4074384881 1.649156e-01
## [1520,] 0.436239133 0.2755194522 5.800410e-02
## [1521,] 0.259696720 0.4403553087 2.488965e-01
## [1522,] 0.407438488 0.3355375785 9.210835e-02
## [1523,] 0.131453291 0.0066840657 1.132892e-04
## [1524,] 0.426168977 0.1482326877 1.718640e-02
## [1525,] 0.444358195 0.2275981001 3.885821e-02
## [1526,] 0.436239133 0.2755194522 5.800410e-02
## [1527,] 0.000000000 0.0000000000 1.000000e+00
## [1528,] 0.392899701 0.1042386963 9.218388e-03
## [1529,] 0.440355309 0.2596967205 5.105149e-02
## [1530,] 0.442218287 0.1953987782 2.877966e-02
## [1531,] 0.430813836 0.2910904300 6.556091e-02
## [1532,] 0.306334128 0.4241549461 1.957638e-01
## [1533,] 0.416337988 0.3211750193 8.258786e-02
## [1534,] 0.227598100 0.4443581954 2.891855e-01
## [1535,] 0.360146521 0.0776786613 5.584740e-03
## [1536,] 0.360146521 0.0776786613 5.584740e-03
## [1537,] 0.416337988 0.3211750193 8.258786e-02
## [1538,] 0.163702964 0.4333313752 3.823512e-01
## [1539,] 0.275519452 0.4362391326 2.302373e-01
## [1540,] 0.444358195 0.2275981001 3.885821e-02
## [1541,] 0.436239133 0.2755194522 5.800410e-02
## [1542,] 0.397531973 0.3493462791 1.023338e-01
## [1543,] 0.430813836 0.2910904300 6.556091e-02
## [1544,] 0.436239133 0.2755194522 5.800410e-02
## [1545,] 0.362525595 0.3866939680 1.374912e-01
## [1546,] 0.444358195 0.2275981001 3.885821e-02
## [1547,] 0.362525595 0.3866939680 1.374912e-01
## [1548,] 0.211473264 0.4440938538 3.108657e-01
## [1549,] 0.259696720 0.4403553087 2.488965e-01
## [1550,] 0.375000000 0.3750000000 1.250000e-01
## [1551,] 0.417093250 0.1331148669 1.416116e-02
## [1552,] 0.227598100 0.4443581954 2.891855e-01
## [1553,] 0.440355309 0.2596967205 5.105149e-02
## [1554,] 0.417093250 0.1331148669 1.416116e-02
## [1555,] 0.340371253 0.0654560102 4.195898e-03
## [1556,] 0.375000000 0.3750000000 1.250000e-01
## [1557,] 0.349346279 0.3975319727 1.507880e-01
## [1558,] 0.169380014 0.0116813803 2.685375e-04
## [1559,] 0.397531973 0.3493462791 1.023338e-01
## [1560,] 0.227598100 0.4443581954 2.891855e-01
## [1561,] 0.440355309 0.2596967205 5.105149e-02
## [1562,] 0.406028666 0.1184250277 1.151354e-02
## [1563,] 0.444358195 0.2275981001 3.885821e-02
## [1564,] 0.148232688 0.4261689772 4.084119e-01
## [1565,] 0.438655970 0.1794501695 2.447048e-02
## [1566,] 0.195398778 0.4422182874 3.336033e-01
## [1567,] 0.426168977 0.1482326877 1.718640e-02
## [1568,] 0.335537578 0.4074384881 1.649156e-01
## [1569,] 0.417093250 0.1331148669 1.416116e-02
## [1570,] 0.426168977 0.1482326877 1.718640e-02
## [1571,] 0.444358195 0.2275981001 3.885821e-02
## [1572,] 0.227598100 0.4443581954 2.891855e-01
## [1573,] 0.375000000 0.3750000000 1.250000e-01
## [1574,] 0.443086838 0.2436977611 4.467792e-02
## [1575,] 0.375000000 0.3750000000 1.250000e-01
## [1576,] 0.227598100 0.4443581954 2.891855e-01
## [1577,] 0.444358195 0.2275981001 3.885821e-02
## [1578,] 0.163702964 0.4333313752 3.823512e-01
## [1579,] 0.266544426 0.0339238361 1.439193e-03
## [1580,] 0.321175019 0.4163379880 1.798991e-01
## [1581,] 0.204487093 0.0179374643 5.244873e-04
## [1582,] 0.438655970 0.1794501695 2.447048e-02
## [1583,] 0.046838810 0.0007678494 4.195898e-06
## [1584,] 0.430813836 0.2910904300 6.556091e-02
## [1585,] 0.443086838 0.2436977611 4.467792e-02
## [1586,] 0.444093854 0.2114732637 3.356718e-02
## [1587,] 0.163702964 0.4333313752 3.823512e-01
## [1588,] 0.416337988 0.3211750193 8.258786e-02
## [1589,] 0.406028666 0.1184250277 1.151354e-02
## [1590,] 0.442218287 0.1953987782 2.877966e-02
## [1591,] 0.442218287 0.1953987782 2.877966e-02
## [1592,] 0.416337988 0.3211750193 8.258786e-02
## [1593,] 0.424154946 0.3063341278 7.374710e-02
## [1594,] 0.444358195 0.2275981001 3.885821e-02
## [1595,] 0.417093250 0.1331148669 1.416116e-02
## [1596,] 0.433331375 0.1637029640 2.061445e-02
## [1597,] 0.163702964 0.4333313752 3.823512e-01
## [1598,] 0.416337988 0.3211750193 8.258786e-02
## [1599,] 0.440355309 0.2596967205 5.105149e-02
## [1600,] 0.416337988 0.3211750193 8.258786e-02
## [1601,] 0.433331375 0.1637029640 2.061445e-02
## [1602,] 0.335537578 0.4074384881 1.649156e-01
## [1603,] 0.443086838 0.2436977611 4.467792e-02
## [1604,] 0.440355309 0.2596967205 5.105149e-02
## [1605,] 0.386693968 0.3625255950 1.132892e-01
## [1606,] 0.291090430 0.4308138364 2.125348e-01
## [1607,] 0.148232688 0.4261689772 4.084119e-01
## [1608,] 0.360146521 0.0776786613 5.584740e-03
## [1609,] 0.440355309 0.2596967205 5.105149e-02
## [1610,] 0.243697761 0.4430868383 2.685375e-01
## [1611,] 0.426168977 0.1482326877 1.718640e-02
## [1612,] 0.430813836 0.2910904300 6.556091e-02
## [1613,] 0.407438488 0.3355375785 9.210835e-02
## [1614,] 0.397531973 0.3493462791 1.023338e-01
## [1615,] 0.416337988 0.3211750193 8.258786e-02
## [1616,] 0.426168977 0.1482326877 1.718640e-02
## [1617,] 0.406028666 0.1184250277 1.151354e-02
## [1618,] 0.291090430 0.4308138364 2.125348e-01
## [1619,] 0.169380014 0.0116813803 2.685375e-04
## [1620,] 0.426168977 0.1482326877 1.718640e-02
## [1621,] 0.386693968 0.3625255950 1.132892e-01
## [1622,] 0.375000000 0.3750000000 1.250000e-01
## [1623,] 0.397531973 0.3493462791 1.023338e-01
## [1624,] 0.433331375 0.1637029640 2.061445e-02
## [1625,] 0.362525595 0.3866939680 1.374912e-01
## [1626,] 0.291090430 0.4308138364 2.125348e-01
## [1627,] 0.416337988 0.3211750193 8.258786e-02
## [1628,] 0.443086838 0.2436977611 4.467792e-02
## [1629,] 0.397531973 0.3493462791 1.023338e-01
## [1630,] 0.436239133 0.2755194522 5.800410e-02
## [1631,] 0.386693968 0.3625255950 1.132892e-01
## [1632,] 0.375000000 0.3750000000 1.250000e-01
## [1633,] 0.349346279 0.3975319727 1.507880e-01
## [1634,] 0.243697761 0.4430868383 2.685375e-01
## [1635,] 0.406028666 0.1184250277 1.151354e-02
## [1636,] 0.291090430 0.4308138364 2.125348e-01
## [1637,] 0.266544426 0.0339238361 1.439193e-03
## [1638,] 0.033923836 0.2665444262 6.980925e-01
## [1639,] 0.000000000 0.0000000000 0.000000e+00
## [1640,] 0.335537578 0.4074384881 1.649156e-01
## [1641,] 0.349346279 0.3975319727 1.507880e-01
## [1642,] 0.424154946 0.3063341278 7.374710e-02
## [1643,] 0.360146521 0.0776786613 5.584740e-03
## [1644,] 0.386693968 0.3625255950 1.132892e-01
## [1645,] 0.179450170 0.4386559699 3.574234e-01
## [1646,] 0.236850055 0.0253767916 9.063140e-04
## [1647,] 0.386693968 0.3625255950 1.132892e-01
## [1648,] 0.306334128 0.4241549461 1.957638e-01
## [1649,] 0.386693968 0.3625255950 1.132892e-01
## [1650,] 0.033923836 0.2665444262 6.980925e-01
## [1651,] 0.377630828 0.0906313987 7.250512e-03
## [1652,] 0.386693968 0.3625255950 1.132892e-01
## [1653,] 0.360146521 0.0776786613 5.584740e-03
## [1654,] 0.443086838 0.2436977611 4.467792e-02
## [1655,] 0.335537578 0.4074384881 1.649156e-01
## [1656,] 0.407438488 0.3355375785 9.210835e-02
## [1657,] 0.424154946 0.3063341278 7.374710e-02
## [1658,] 0.443086838 0.2436977611 4.467792e-02
## [1659,] 0.392899701 0.1042386963 9.218388e-03
## [1660,] 0.046838810 0.0007678494 4.195898e-06
## [1661,] 0.430813836 0.2910904300 6.556091e-02
## [1662,] 0.275519452 0.4362391326 2.302373e-01
## [1663,] 0.291090430 0.4308138364 2.125348e-01
## [1664,] 0.436239133 0.2755194522 5.800410e-02
## [1665,] 0.318229499 0.0540389715 3.058810e-03
## [1666,] 0.426168977 0.1482326877 1.718640e-02
## [1667,] 0.397531973 0.3493462791 1.023338e-01
## [1668,] 0.417093250 0.1331148669 1.416116e-02
## [1669,] 0.433331375 0.1637029640 2.061445e-02
## [1670,] 0.443086838 0.2436977611 4.467792e-02
## [1671,] 0.397531973 0.3493462791 1.023338e-01
## [1672,] 0.416337988 0.3211750193 8.258786e-02
## [1673,] 0.306334128 0.4241549461 1.957638e-01
## [1674,] 0.440355309 0.2596967205 5.105149e-02
## [1675,] 0.407438488 0.3355375785 9.210835e-02
## [1676,] 0.424154946 0.3063341278 7.374710e-02
## [1677,] 0.424154946 0.3063341278 7.374710e-02
## [1678,] 0.407438488 0.3355375785 9.210835e-02
## [1679,] 0.444093854 0.2114732637 3.356718e-02
## [1680,] 0.417093250 0.1331148669 1.416116e-02
## [1681,] 0.335537578 0.4074384881 1.649156e-01
## [1682,] 0.417093250 0.1331148669 1.416116e-02
## [1683,] 0.406028666 0.1184250277 1.151354e-02
## [1684,] 0.444358195 0.2275981001 3.885821e-02
## [1685,] 0.438655970 0.1794501695 2.447048e-02
## [1686,] 0.442218287 0.1953987782 2.877966e-02
## [1687,] 0.443086838 0.2436977611 4.467792e-02
## [1688,] 0.275519452 0.4362391326 2.302373e-01
## [1689,] 0.375000000 0.3750000000 1.250000e-01
## [1690,] 0.406028666 0.1184250277 1.151354e-02
## [1691,] 0.386693968 0.3625255950 1.132892e-01
## [1692,] 0.386693968 0.3625255950 1.132892e-01
## [1693,] 0.406028666 0.1184250277 1.151354e-02
## [1694,] 0.377630828 0.0906313987 7.250512e-03
## [1695,] 0.417093250 0.1331148669 1.416116e-02
## [1696,] 0.275519452 0.4362391326 2.302373e-01
## [1697,] 0.407438488 0.3355375785 9.210835e-02
## [1698,] 0.375000000 0.3750000000 1.250000e-01
## [1699,] 0.442218287 0.1953987782 2.877966e-02
## [1700,] 0.321175019 0.4163379880 1.798991e-01
## [1701,] 0.275519452 0.4362391326 2.302373e-01
## [1702,] 0.275519452 0.4362391326 2.302373e-01
## [1703,] 0.386693968 0.3625255950 1.132892e-01
## [1704,] 0.397531973 0.3493462791 1.023338e-01
## [1705,] 0.335537578 0.4074384881 1.649156e-01
## [1706,] 0.443086838 0.2436977611 4.467792e-02
## [1707,] 0.433331375 0.1637029640 2.061445e-02
## [1708,] 0.443086838 0.2436977611 4.467792e-02
## [1709,] 0.169380014 0.0116813803 2.685375e-04
## [1710,] 0.386693968 0.3625255950 1.132892e-01
## [1711,] 0.443086838 0.2436977611 4.467792e-02
## [1712,] 0.416337988 0.3211750193 8.258786e-02
## [1713,] 0.377630828 0.0906313987 7.250512e-03
## [1714,] 0.407438488 0.3355375785 9.210835e-02
## [1715,] 0.406028666 0.1184250277 1.151354e-02
## [1716,] 0.321175019 0.4163379880 1.798991e-01
## [1717,] 0.406028666 0.1184250277 1.151354e-02
## [1718,] 0.444358195 0.2275981001 3.885821e-02
## [1719,] 0.349346279 0.3975319727 1.507880e-01
## [1720,] 0.443086838 0.2436977611 4.467792e-02
## [1721,] 0.118425028 0.4060286664 4.640328e-01
## [1722,] 0.443086838 0.2436977611 4.467792e-02
## [1723,] 0.335537578 0.4074384881 1.649156e-01
## [1724,] 0.406028666 0.1184250277 1.151354e-02
## [1725,] 0.416337988 0.3211750193 8.258786e-02
## [1726,] 0.442218287 0.1953987782 2.877966e-02
## [1727,] 0.375000000 0.3750000000 1.250000e-01
## [1728,] 0.321175019 0.4163379880 1.798991e-01
## [1729,] 0.118425028 0.4060286664 4.640328e-01
## [1730,] 0.440355309 0.2596967205 5.105149e-02
## [1731,] 0.306334128 0.4241549461 1.957638e-01
## [1732,] 0.236850055 0.0253767916 9.063140e-04
## [1733,] 0.179450170 0.4386559699 3.574234e-01
## [1734,] 0.163702964 0.4333313752 3.823512e-01
## [1735,] 0.293645732 0.0435030714 2.148300e-03
## [1736,] 0.416337988 0.3211750193 8.258786e-02
## [1737,] 0.204487093 0.0179374643 5.244873e-04
## [1738,] 0.392899701 0.1042386963 9.218388e-03
## [1739,] 0.430813836 0.2910904300 6.556091e-02
## [1740,] 0.386693968 0.3625255950 1.132892e-01
## [1741,] 0.291090430 0.4308138364 2.125348e-01
## [1742,] 0.386693968 0.3625255950 1.132892e-01
## [1743,] 0.163702964 0.4333313752 3.823512e-01
## [1744,] 0.259696720 0.4403553087 2.488965e-01
## [1745,] 0.077678661 0.3601465208 5.565901e-01
## [1746,] 0.392899701 0.1042386963 9.218388e-03
## [1747,] 0.444093854 0.2114732637 3.356718e-02
## [1748,] 0.424154946 0.3063341278 7.374710e-02
## [1749,] 0.392899701 0.1042386963 9.218388e-03
## [1750,] 0.375000000 0.3750000000 1.250000e-01
## [1751,] 0.293645732 0.0435030714 2.148300e-03
## [1752,] 0.377630828 0.0906313987 7.250512e-03
## [1753,] 0.443086838 0.2436977611 4.467792e-02
## [1754,] 0.424154946 0.3063341278 7.374710e-02
## [1755,] 0.133114867 0.4170932496 4.356307e-01
## [1756,] 0.306334128 0.4241549461 1.957638e-01
## [1757,] 0.275519452 0.4362391326 2.302373e-01
## [1758,] 0.442218287 0.1953987782 2.877966e-02
## [1759,] 0.407438488 0.3355375785 9.210835e-02
## [1760,] 0.442218287 0.1953987782 2.877966e-02
## [1761,] 0.243697761 0.4430868383 2.685375e-01
## [1762,] 0.349346279 0.3975319727 1.507880e-01
## [1763,] 0.436239133 0.2755194522 5.800410e-02
## [1764,] 0.407438488 0.3355375785 9.210835e-02
## [1765,] 0.430813836 0.2910904300 6.556091e-02
## [1766,] 0.397531973 0.3493462791 1.023338e-01
## [1767,] 0.424154946 0.3063341278 7.374710e-02
## [1768,] 0.438655970 0.1794501695 2.447048e-02
## [1769,] 0.360146521 0.0776786613 5.584740e-03
## [1770,] 0.090631399 0.0030210466 3.356718e-05
## [1771,] 0.406028666 0.1184250277 1.151354e-02
## [1772,] 0.438655970 0.1794501695 2.447048e-02
## [1773,] 0.392899701 0.1042386963 9.218388e-03
## [1774,] 0.340371253 0.0654560102 4.195898e-03
## [1775,] 0.436239133 0.2755194522 5.800410e-02
## [1776,] 0.148232688 0.4261689772 4.084119e-01
## [1777,] 0.442218287 0.1953987782 2.877966e-02
## [1778,] 0.377630828 0.0906313987 7.250512e-03
## [1779,] 0.293645732 0.0435030714 2.148300e-03
## [1780,] 0.424154946 0.3063341278 7.374710e-02
## [1781,] 0.386693968 0.3625255950 1.132892e-01
## [1782,] 0.321175019 0.4163379880 1.798991e-01
## [1783,] 0.436239133 0.2755194522 5.800410e-02
## [1784,] 0.266544426 0.0339238361 1.439193e-03
## [1785,] 0.335537578 0.4074384881 1.649156e-01
## [1786,] 0.444093854 0.2114732637 3.356718e-02
## [1787,] 0.360146521 0.0776786613 5.584740e-03
## [1788,] 0.259696720 0.4403553087 2.488965e-01
## [1789,] 0.362525595 0.3866939680 1.374912e-01
## [1790,] 0.204487093 0.0179374643 5.244873e-04
## [1791,] 0.195398778 0.4422182874 3.336033e-01
## [1792,] 0.065456010 0.3403712531 5.899768e-01
## [1793,] 0.227598100 0.4443581954 2.891855e-01
## [1794,] 0.266544426 0.0339238361 1.439193e-03
## [1795,] 0.386693968 0.3625255950 1.132892e-01
## [1796,] 0.335537578 0.4074384881 1.649156e-01
## [1797,] 0.424154946 0.3063341278 7.374710e-02
## [1798,] 0.430813836 0.2910904300 6.556091e-02
## [1799,] 0.349346279 0.3975319727 1.507880e-01
## [1800,] 0.430813836 0.2910904300 6.556091e-02
## [1801,] 0.340371253 0.0654560102 4.195898e-03
## [1802,] 0.306334128 0.4241549461 1.957638e-01
## [1803,] 0.438655970 0.1794501695 2.447048e-02
## [1804,] 0.054038972 0.3182294988 6.246727e-01
## [1805,] 0.204487093 0.0179374643 5.244873e-04
## [1806,] 0.436239133 0.2755194522 5.800410e-02
## [1807,] 0.318229499 0.0540389715 3.058810e-03
## [1808,] 0.360146521 0.0776786613 5.584740e-03
## [1809,] 0.440355309 0.2596967205 5.105149e-02
## [1810,] 0.169380014 0.0116813803 2.685375e-04
## [1811,] 0.444358195 0.2275981001 3.885821e-02
## [1812,] 0.375000000 0.3750000000 1.250000e-01
## [1813,] 0.436239133 0.2755194522 5.800410e-02
## [1814,] 0.291090430 0.4308138364 2.125348e-01
## [1815,] 0.397531973 0.3493462791 1.023338e-01
## [1816,] 0.377630828 0.0906313987 7.250512e-03
## [1817,] 0.275519452 0.4362391326 2.302373e-01
## [1818,] 0.430813836 0.2910904300 6.556091e-02
## [1819,] 0.433331375 0.1637029640 2.061445e-02
## [1820,] 0.243697761 0.4430868383 2.685375e-01
## [1821,] 0.077678661 0.3601465208 5.565901e-01
## [1822,] 0.090631399 0.3776308281 5.244873e-01
## [1823,] 0.335537578 0.4074384881 1.649156e-01
## [1824,] 0.118425028 0.4060286664 4.640328e-01
## [1825,] 0.377630828 0.0906313987 7.250512e-03
## [1826,] 0.430813836 0.2910904300 6.556091e-02
## [1827,] 0.306334128 0.4241549461 1.957638e-01
## [1828,] 0.442218287 0.1953987782 2.877966e-02
## [1829,] 0.407438488 0.3355375785 9.210835e-02
## [1830,] 0.321175019 0.4163379880 1.798991e-01
## [1831,] 0.392899701 0.1042386963 9.218388e-03
## [1832,] 0.000000000 0.0000000000 0.000000e+00
## [1833,] 0.375000000 0.3750000000 1.250000e-01
## [1834,] 0.443086838 0.2436977611 4.467792e-02
## [1835,] 0.433331375 0.1637029640 2.061445e-02
## [1836,] 0.407438488 0.3355375785 9.210835e-02
## [1837,] 0.443086838 0.2436977611 4.467792e-02
## [1838,] 0.444358195 0.2275981001 3.885821e-02
## [1839,] 0.436239133 0.2755194522 5.800410e-02
## [1840,] 0.442218287 0.1953987782 2.877966e-02
## [1841,] 0.243697761 0.4430868383 2.685375e-01
## [1842,] 0.443086838 0.2436977611 4.467792e-02
## [1843,] 0.318229499 0.0540389715 3.058810e-03
## [1844,] 0.392899701 0.1042386963 9.218388e-03
## [1845,] 0.424154946 0.3063341278 7.374710e-02
## [1846,] 0.444093854 0.2114732637 3.356718e-02
## [1847,] 0.426168977 0.1482326877 1.718640e-02
## [1848,] 0.440355309 0.2596967205 5.105149e-02
## [1849,] 0.090631399 0.0030210466 3.356718e-05
## [1850,] 0.444093854 0.2114732637 3.356718e-02
## [1851,] 0.430813836 0.2910904300 6.556091e-02
## [1852,] 0.362525595 0.3866939680 1.374912e-01
## [1853,] 0.291090430 0.4308138364 2.125348e-01
## [1854,] 0.236850055 0.0253767916 9.063140e-04
## [1855,] 0.440355309 0.2596967205 5.105149e-02
## [1856,] 0.442218287 0.1953987782 2.877966e-02
## [1857,] 0.436239133 0.2755194522 5.800410e-02
## [1858,] 0.266544426 0.0339238361 1.439193e-03
## [1859,] 0.416337988 0.3211750193 8.258786e-02
## [1860,] 0.443086838 0.2436977611 4.467792e-02
## [1861,] 0.430813836 0.2910904300 6.556091e-02
## [1862,] 0.362525595 0.3866939680 1.374912e-01
## [1863,] 0.436239133 0.2755194522 5.800410e-02
## [1864,] 0.046838810 0.0007678494 4.195898e-06
## [1865,] 0.424154946 0.3063341278 7.374710e-02
## [1866,] 0.293645732 0.0435030714 2.148300e-03
## [1867,] 0.306334128 0.4241549461 1.957638e-01
## [1868,] 0.406028666 0.1184250277 1.151354e-02
## [1869,] 0.375000000 0.3750000000 1.250000e-01
## [1870,] 0.433331375 0.1637029640 2.061445e-02
## [1871,] 0.426168977 0.1482326877 1.718640e-02
## [1872,] 0.204487093 0.0179374643 5.244873e-04
## [1873,] 0.211473264 0.4440938538 3.108657e-01
## [1874,] 0.397531973 0.3493462791 1.023338e-01
## [1875,] 0.386693968 0.3625255950 1.132892e-01
## [1876,] 0.433331375 0.1637029640 2.061445e-02
## [1877,] 0.291090430 0.4308138364 2.125348e-01
## [1878,] 0.433331375 0.1637029640 2.061445e-02
## [1879,] 0.442218287 0.1953987782 2.877966e-02
## [1880,] 0.318229499 0.0540389715 3.058810e-03
## [1881,] 0.148232688 0.4261689772 4.084119e-01
## [1882,] 0.293645732 0.0435030714 2.148300e-03
## [1883,] 0.440355309 0.2596967205 5.105149e-02
## [1884,] 0.169380014 0.0116813803 2.685375e-04
## [1885,] 0.407438488 0.3355375785 9.210835e-02
## [1886,] 0.204487093 0.0179374643 5.244873e-04
## [1887,] 0.424154946 0.3063341278 7.374710e-02
## [1888,] 0.090631399 0.0030210466 3.356718e-05
## [1889,] 0.430813836 0.2910904300 6.556091e-02
## [1890,] 0.407438488 0.3355375785 9.210835e-02
## [1891,] 0.417093250 0.1331148669 1.416116e-02
## [1892,] 0.179450170 0.4386559699 3.574234e-01
## [1893,] 0.444093854 0.2114732637 3.356718e-02
## [1894,] 0.407438488 0.3355375785 9.210835e-02
## [1895,] 0.163702964 0.4333313752 3.823512e-01
## [1896,] 0.243697761 0.4430868383 2.685375e-01
## [1897,] 0.204487093 0.0179374643 5.244873e-04
## [1898,] 0.362525595 0.3866939680 1.374912e-01
## [1899,] 0.433331375 0.1637029640 2.061445e-02
## [1900,] 0.444093854 0.2114732637 3.356718e-02
## [1901,] 0.438655970 0.1794501695 2.447048e-02
## [1902,] 0.406028666 0.1184250277 1.151354e-02
## [1903,] 0.440355309 0.2596967205 5.105149e-02
## [1904,] 0.293645732 0.0435030714 2.148300e-03
## [1905,] 0.293645732 0.0435030714 2.148300e-03
## [1906,] 0.266544426 0.0339238361 1.439193e-03
## [1907,] 0.243697761 0.4430868383 2.685375e-01
## [1908,] 0.259696720 0.4403553087 2.488965e-01
## [1909,] 0.377630828 0.0906313987 7.250512e-03
## [1910,] 0.424154946 0.3063341278 7.374710e-02
## [1911,] 0.360146521 0.0776786613 5.584740e-03
## [1912,] 0.349346279 0.3975319727 1.507880e-01
## [1913,] 0.442218287 0.1953987782 2.877966e-02
## [1914,] 0.104238696 0.3928997013 4.936432e-01
## [1915,] 0.426168977 0.1482326877 1.718640e-02
## [1916,] 0.362525595 0.3866939680 1.374912e-01
## [1917,] 0.444093854 0.2114732637 3.356718e-02
## [1918,] 0.291090430 0.4308138364 2.125348e-01
## [1919,] 0.444358195 0.2275981001 3.885821e-02
## [1920,] 0.306334128 0.4241549461 1.957638e-01
## [1921,] 0.375000000 0.3750000000 1.250000e-01
## [1922,] 0.444358195 0.2275981001 3.885821e-02
## [1923,] 0.406028666 0.1184250277 1.151354e-02
## [1924,] 0.397531973 0.3493462791 1.023338e-01
## [1925,] 0.443086838 0.2436977611 4.467792e-02
## [1926,] 0.349346279 0.3975319727 1.507880e-01
## [1927,] 0.340371253 0.0654560102 4.195898e-03
## [1928,] 0.291090430 0.4308138364 2.125348e-01
## [1929,] 0.424154946 0.3063341278 7.374710e-02
## [1930,] 0.377630828 0.0906313987 7.250512e-03
## [1931,] 0.443086838 0.2436977611 4.467792e-02
## [1932,] 0.375000000 0.3750000000 1.250000e-01
## [1933,] 0.430813836 0.2910904300 6.556091e-02
## [1934,] 0.424154946 0.3063341278 7.374710e-02
## [1935,] 0.406028666 0.1184250277 1.151354e-02
## [1936,] 0.426168977 0.1482326877 1.718640e-02
## [1937,] 0.438655970 0.1794501695 2.447048e-02
## [1938,] 0.349346279 0.3975319727 1.507880e-01
## [1939,] 0.211473264 0.4440938538 3.108657e-01
## [1940,] 0.438655970 0.1794501695 2.447048e-02
## [1941,] 0.440355309 0.2596967205 5.105149e-02
## [1942,] 0.275519452 0.4362391326 2.302373e-01
## [1943,] 0.424154946 0.3063341278 7.374710e-02
## [1944,] 0.416337988 0.3211750193 8.258786e-02
## [1945,] 0.266544426 0.0339238361 1.439193e-03
## [1946,] 0.335537578 0.4074384881 1.649156e-01
## [1947,] 0.377630828 0.0906313987 7.250512e-03
## [1948,] 0.360146521 0.0776786613 5.584740e-03
## [1949,] 0.204487093 0.0179374643 5.244873e-04
## [1950,] 0.386693968 0.3625255950 1.132892e-01
## [1951,] 0.424154946 0.3063341278 7.374710e-02
## [1952,] 0.349346279 0.3975319727 1.507880e-01
## [1953,] 0.438655970 0.1794501695 2.447048e-02
## [1954,] 0.204487093 0.0179374643 5.244873e-04
## [1955,] 0.349346279 0.3975319727 1.507880e-01
## [1956,] 0.397531973 0.3493462791 1.023338e-01
## [1957,] 0.426168977 0.1482326877 1.718640e-02
## [1958,] 0.426168977 0.1482326877 1.718640e-02
## [1959,] 0.430813836 0.2910904300 6.556091e-02
## [1960,] 0.430813836 0.2910904300 6.556091e-02
## [1961,] 0.227598100 0.4443581954 2.891855e-01
## [1962,] 0.321175019 0.4163379880 1.798991e-01
## [1963,] 0.090631399 0.0030210466 3.356718e-05
## [1964,] 0.443086838 0.2436977611 4.467792e-02
## [1965,] 0.386693968 0.3625255950 1.132892e-01
## [1966,] 0.430813836 0.2910904300 6.556091e-02
## [1967,] 0.275519452 0.4362391326 2.302373e-01
## [1968,] 0.291090430 0.4308138364 2.125348e-01
## [1969,] 0.444093854 0.2114732637 3.356718e-02
## [1970,] 0.335537578 0.4074384881 1.649156e-01
## [1971,] 0.443086838 0.2436977611 4.467792e-02
## [1972,] 0.360146521 0.0776786613 5.584740e-03
## [1973,] 0.444358195 0.2275981001 3.885821e-02
## [1974,] 0.362525595 0.3866939680 1.374912e-01
## [1975,] 0.362525595 0.3866939680 1.374912e-01
## [1976,] 0.259696720 0.4403553087 2.488965e-01
## [1977,] 0.377630828 0.0906313987 7.250512e-03
## [1978,] 0.275519452 0.4362391326 2.302373e-01
## [1979,] 0.104238696 0.3928997013 4.936432e-01
## [1980,] 0.349346279 0.3975319727 1.507880e-01
## [1981,] 0.416337988 0.3211750193 8.258786e-02
## [1982,] 0.306334128 0.4241549461 1.957638e-01
## [1983,] 0.204487093 0.0179374643 5.244873e-04
## [1984,] 0.025376792 0.2368500554 7.368668e-01
## [1985,] 0.442218287 0.1953987782 2.877966e-02
## [1986,] 0.291090430 0.4308138364 2.125348e-01
## [1987,] 0.266544426 0.0339238361 1.439193e-03
## [1988,] 0.118425028 0.4060286664 4.640328e-01
## [1989,] 0.163702964 0.4333313752 3.823512e-01
## [1990,] 0.424154946 0.3063341278 7.374710e-02
## [1991,] 0.406028666 0.1184250277 1.151354e-02
## [1992,] 0.430813836 0.2910904300 6.556091e-02
## [1993,] 0.442218287 0.1953987782 2.877966e-02
## [1994,] 0.293645732 0.0435030714 2.148300e-03
## [1995,] 0.444358195 0.2275981001 3.885821e-02
## [1996,] 0.416337988 0.3211750193 8.258786e-02
## [1997,] 0.443086838 0.2436977611 4.467792e-02
## [1998,] 0.349346279 0.3975319727 1.507880e-01
## [1999,] 0.430813836 0.2910904300 6.556091e-02
## [2000,] 0.335537578 0.4074384881 1.649156e-01
## [2001,] 0.362525595 0.3866939680 1.374912e-01
## [2002,] 0.306334128 0.4241549461 1.957638e-01
## [2003,] 0.340371253 0.0654560102 4.195898e-03
## [2004,] 0.340371253 0.0654560102 4.195898e-03
## [2005,] 0.293645732 0.0435030714 2.148300e-03
## [2006,] 0.416337988 0.3211750193 8.258786e-02
## [2007,] 0.033923836 0.2665444262 6.980925e-01
## [2008,] 0.392899701 0.1042386963 9.218388e-03
## [2009,] 0.443086838 0.2436977611 4.467792e-02
## [2010,] 0.444093854 0.2114732637 3.356718e-02
## [2011,] 0.436239133 0.2755194522 5.800410e-02
## [2012,] 0.362525595 0.3866939680 1.374912e-01
## [2013,] 0.349346279 0.3975319727 1.507880e-01
## [2014,] 0.443086838 0.2436977611 4.467792e-02
## [2015,] 0.266544426 0.0339238361 1.439193e-03
## [2016,] 0.397531973 0.3493462791 1.023338e-01
## [2017,] 0.104238696 0.3928997013 4.936432e-01
## [2018,] 0.424154946 0.3063341278 7.374710e-02
## [2019,] 0.417093250 0.1331148669 1.416116e-02
## [2020,] 0.360146521 0.0776786613 5.584740e-03
## [2021,] 0.318229499 0.0540389715 3.058810e-03
## [2022,] 0.443086838 0.2436977611 4.467792e-02
## [2023,] 0.438655970 0.1794501695 2.447048e-02
## [2024,] 0.386693968 0.3625255950 1.132892e-01
## [2025,] 0.321175019 0.4163379880 1.798991e-01
## [2026,] 0.444093854 0.2114732637 3.356718e-02
## [2027,] 0.065456010 0.3403712531 5.899768e-01
## [2028,] 0.236850055 0.0253767916 9.063140e-04
## [2029,] 0.169380014 0.0116813803 2.685375e-04
## [2030,] 0.360146521 0.0776786613 5.584740e-03
## [2031,] 0.444093854 0.2114732637 3.356718e-02
## [2032,] 0.054038972 0.3182294988 6.246727e-01
## [2033,] 0.406028666 0.1184250277 1.151354e-02
## [2034,] 0.406028666 0.1184250277 1.151354e-02
## [2035,] 0.417093250 0.1331148669 1.416116e-02
## [2036,] 0.438655970 0.1794501695 2.447048e-02
## [2037,] 0.407438488 0.3355375785 9.210835e-02
## [2038,] 0.227598100 0.4443581954 2.891855e-01
## [2039,] 0.377630828 0.0906313987 7.250512e-03
## [2040,] 0.306334128 0.4241549461 1.957638e-01
## [2041,] 0.392899701 0.1042386963 9.218388e-03
## [2042,] 0.426168977 0.1482326877 1.718640e-02
## [2043,] 0.397531973 0.3493462791 1.023338e-01
## [2044,] 0.360146521 0.0776786613 5.584740e-03
## [2045,] 0.243697761 0.4430868383 2.685375e-01
## [2046,] 0.440355309 0.2596967205 5.105149e-02
## [2047,] 0.275519452 0.4362391326 2.302373e-01
## [2048,] 0.335537578 0.4074384881 1.649156e-01
## [2049,] 0.321175019 0.4163379880 1.798991e-01
## [2050,] 0.442218287 0.1953987782 2.877966e-02
## [2051,] 0.433331375 0.1637029640 2.061445e-02
## [2052,] 0.443086838 0.2436977611 4.467792e-02
## [2053,] 0.306334128 0.4241549461 1.957638e-01
## [2054,] 0.442218287 0.1953987782 2.877966e-02
## [2055,] 0.444358195 0.2275981001 3.885821e-02
## [2056,] 0.397531973 0.3493462791 1.023338e-01
## [2057,] 0.349346279 0.3975319727 1.507880e-01
## [2058,] 0.397531973 0.3493462791 1.023338e-01
## [2059,] 0.340371253 0.0654560102 4.195898e-03
## [2060,] 0.133114867 0.4170932496 4.356307e-01
## [2061,] 0.436239133 0.2755194522 5.800410e-02
## [2062,] 0.243697761 0.4430868383 2.685375e-01
## [2063,] 0.375000000 0.3750000000 1.250000e-01
## [2064,] 0.424154946 0.3063341278 7.374710e-02
## [2065,] 0.386693968 0.3625255950 1.132892e-01
## [2066,] 0.436239133 0.2755194522 5.800410e-02
## [2067,] 0.377630828 0.0906313987 7.250512e-03
## [2068,] 0.392899701 0.1042386963 9.218388e-03
## [2069,] 0.360146521 0.0776786613 5.584740e-03
## [2070,] 0.442218287 0.1953987782 2.877966e-02
## [2071,] 0.275519452 0.4362391326 2.302373e-01
## [2072,] 0.424154946 0.3063341278 7.374710e-02
## [2073,] 0.266544426 0.0339238361 1.439193e-03
## [2074,] 0.392899701 0.1042386963 9.218388e-03
## [2075,] 0.349346279 0.3975319727 1.507880e-01
## [2076,] 0.266544426 0.0339238361 1.439193e-03
## [2077,] 0.362525595 0.3866939680 1.374912e-01
## [2078,] 0.377630828 0.0906313987 7.250512e-03
## [2079,] 0.443086838 0.2436977611 4.467792e-02
## [2080,] 0.426168977 0.1482326877 1.718640e-02
## [2081,] 0.436239133 0.2755194522 5.800410e-02
## [2082,] 0.377630828 0.0906313987 7.250512e-03
## [2083,] 0.293645732 0.0435030714 2.148300e-03
## [2084,] 0.360146521 0.0776786613 5.584740e-03
## [2085,] 0.306334128 0.4241549461 1.957638e-01
## [2086,] 0.349346279 0.3975319727 1.507880e-01
## [2087,] 0.375000000 0.3750000000 1.250000e-01
## [2088,] 0.321175019 0.4163379880 1.798991e-01
## [2089,] 0.443086838 0.2436977611 4.467792e-02
## [2090,] 0.335537578 0.4074384881 1.649156e-01
## [2091,] 0.275519452 0.4362391326 2.302373e-01
## [2092,] 0.377630828 0.0906313987 7.250512e-03
## [2093,] 0.349346279 0.3975319727 1.507880e-01
## [2094,] 0.406028666 0.1184250277 1.151354e-02
## [2095,] 0.362525595 0.3866939680 1.374912e-01
## [2096,] 0.293645732 0.0435030714 2.148300e-03
## [2097,] 0.392899701 0.1042386963 9.218388e-03
## [2098,] 0.392899701 0.1042386963 9.218388e-03
## [2099,] 0.424154946 0.3063341278 7.374710e-02
## [2100,] 0.377630828 0.0906313987 7.250512e-03
## [2101,] 0.318229499 0.0540389715 3.058810e-03
## [2102,] 0.291090430 0.4308138364 2.125348e-01
## attr(,"degree")
## [1] 3
## attr(,"knots")
## numeric(0)
## attr(,"Boundary.knots")
## [1] 18 80
## attr(,"intercept")
## [1] FALSE
## attr(,"class")
## [1] "bs"     "basis"  "matrix"

See also: ns(),poly()


Fitting curves with splines

lm1 <- lm(wage ~ bsBasis,data=training)
plot(training$age,training$wage,pch=19,cex=0.5)
points(training$age,predict(lm1,newdata=training),col="red",pch=19,cex=0.5)


Splines on the test set

predict(bsBasis,age=testing$age)
##                   1            2            3
##    [1,] 0.236850055 0.0253767916 9.063140e-04
##    [2,] 0.416337988 0.3211750193 8.258786e-02
##    [3,] 0.430813836 0.2910904300 6.556091e-02
##    [4,] 0.362525595 0.3866939680 1.374912e-01
##    [5,] 0.306334128 0.4241549461 1.957638e-01
##    [6,] 0.424154946 0.3063341278 7.374710e-02
##    [7,] 0.377630828 0.0906313987 7.250512e-03
##    [8,] 0.444358195 0.2275981001 3.885821e-02
##    [9,] 0.442218287 0.1953987782 2.877966e-02
##   [10,] 0.362525595 0.3866939680 1.374912e-01
##   [11,] 0.275519452 0.4362391326 2.302373e-01
##   [12,] 0.444093854 0.2114732637 3.356718e-02
##   [13,] 0.443086838 0.2436977611 4.467792e-02
##   [14,] 0.375000000 0.3750000000 1.250000e-01
##   [15,] 0.430813836 0.2910904300 6.556091e-02
##   [16,] 0.426168977 0.1482326877 1.718640e-02
##   [17,] 0.000000000 0.0000000000 0.000000e+00
##   [18,] 0.291090430 0.4308138364 2.125348e-01
##   [19,] 0.349346279 0.3975319727 1.507880e-01
##   [20,] 0.417093250 0.1331148669 1.416116e-02
##   [21,] 0.426168977 0.1482326877 1.718640e-02
##   [22,] 0.438655970 0.1794501695 2.447048e-02
##   [23,] 0.275519452 0.4362391326 2.302373e-01
##   [24,] 0.266544426 0.0339238361 1.439193e-03
##   [25,] 0.406028666 0.1184250277 1.151354e-02
##   [26,] 0.318229499 0.0540389715 3.058810e-03
##   [27,] 0.340371253 0.0654560102 4.195898e-03
##   [28,] 0.318229499 0.0540389715 3.058810e-03
##   [29,] 0.430813836 0.2910904300 6.556091e-02
##   [30,] 0.362525595 0.3866939680 1.374912e-01
##   [31,] 0.444358195 0.2275981001 3.885821e-02
##   [32,] 0.259696720 0.4403553087 2.488965e-01
##   [33,] 0.266544426 0.0339238361 1.439193e-03
##   [34,] 0.430813836 0.2910904300 6.556091e-02
##   [35,] 0.204487093 0.0179374643 5.244873e-04
##   [36,] 0.377630828 0.0906313987 7.250512e-03
##   [37,] 0.195398778 0.4422182874 3.336033e-01
##   [38,] 0.426168977 0.1482326877 1.718640e-02
##   [39,] 0.077678661 0.3601465208 5.565901e-01
##   [40,] 0.386693968 0.3625255950 1.132892e-01
##   [41,] 0.375000000 0.3750000000 1.250000e-01
##   [42,] 0.436239133 0.2755194522 5.800410e-02
##   [43,] 0.442218287 0.1953987782 2.877966e-02
##   [44,] 0.131453291 0.0066840657 1.132892e-04
##   [45,] 0.243697761 0.4430868383 2.685375e-01
##   [46,] 0.266544426 0.0339238361 1.439193e-03
##   [47,] 0.443086838 0.2436977611 4.467792e-02
##   [48,] 0.424154946 0.3063341278 7.374710e-02
##   [49,] 0.424154946 0.3063341278 7.374710e-02
##   [50,] 0.195398778 0.4422182874 3.336033e-01
##   [51,] 0.291090430 0.4308138364 2.125348e-01
##   [52,] 0.436239133 0.2755194522 5.800410e-02
##   [53,] 0.266544426 0.0339238361 1.439193e-03
##   [54,] 0.321175019 0.4163379880 1.798991e-01
##   [55,] 0.397531973 0.3493462791 1.023338e-01
##   [56,] 0.407438488 0.3355375785 9.210835e-02
##   [57,] 0.426168977 0.1482326877 1.718640e-02
##   [58,] 0.169380014 0.0116813803 2.685375e-04
##   [59,] 0.416337988 0.3211750193 8.258786e-02
##   [60,] 0.179450170 0.4386559699 3.574234e-01
##   [61,] 0.306334128 0.4241549461 1.957638e-01
##   [62,] 0.426168977 0.1482326877 1.718640e-02
##   [63,] 0.362525595 0.3866939680 1.374912e-01
##   [64,] 0.407438488 0.3355375785 9.210835e-02
##   [65,] 0.440355309 0.2596967205 5.105149e-02
##   [66,] 0.444093854 0.2114732637 3.356718e-02
##   [67,] 0.433331375 0.1637029640 2.061445e-02
##   [68,] 0.118425028 0.4060286664 4.640328e-01
##   [69,] 0.442218287 0.1953987782 2.877966e-02
##   [70,] 0.444358195 0.2275981001 3.885821e-02
##   [71,] 0.436239133 0.2755194522 5.800410e-02
##   [72,] 0.349346279 0.3975319727 1.507880e-01
##   [73,] 0.444093854 0.2114732637 3.356718e-02
##   [74,] 0.375000000 0.3750000000 1.250000e-01
##   [75,] 0.436239133 0.2755194522 5.800410e-02
##   [76,] 0.430813836 0.2910904300 6.556091e-02
##   [77,] 0.227598100 0.4443581954 2.891855e-01
##   [78,] 0.259696720 0.4403553087 2.488965e-01
##   [79,] 0.266544426 0.0339238361 1.439193e-03
##   [80,] 0.375000000 0.3750000000 1.250000e-01
##   [81,] 0.444093854 0.2114732637 3.356718e-02
##   [82,] 0.195398778 0.4422182874 3.336033e-01
##   [83,] 0.335537578 0.4074384881 1.649156e-01
##   [84,] 0.211473264 0.4440938538 3.108657e-01
##   [85,] 0.407438488 0.3355375785 9.210835e-02
##   [86,] 0.131453291 0.0066840657 1.132892e-04
##   [87,] 0.195398778 0.4422182874 3.336033e-01
##   [88,] 0.406028666 0.1184250277 1.151354e-02
##   [89,] 0.243697761 0.4430868383 2.685375e-01
##   [90,] 0.406028666 0.1184250277 1.151354e-02
##   [91,] 0.169380014 0.0116813803 2.685375e-04
##   [92,] 0.349346279 0.3975319727 1.507880e-01
##   [93,] 0.424154946 0.3063341278 7.374710e-02
##   [94,] 0.211473264 0.4440938538 3.108657e-01
##   [95,] 0.443086838 0.2436977611 4.467792e-02
##   [96,] 0.433331375 0.1637029640 2.061445e-02
##   [97,] 0.433331375 0.1637029640 2.061445e-02
##   [98,] 0.211473264 0.4440938538 3.108657e-01
##   [99,] 0.444093854 0.2114732637 3.356718e-02
##  [100,] 0.321175019 0.4163379880 1.798991e-01
##  [101,] 0.259696720 0.4403553087 2.488965e-01
##  [102,] 0.148232688 0.4261689772 4.084119e-01
##  [103,] 0.433331375 0.1637029640 2.061445e-02
##  [104,] 0.306334128 0.4241549461 1.957638e-01
##  [105,] 0.416337988 0.3211750193 8.258786e-02
##  [106,] 0.243697761 0.4430868383 2.685375e-01
##  [107,] 0.386693968 0.3625255950 1.132892e-01
##  [108,] 0.407438488 0.3355375785 9.210835e-02
##  [109,] 0.407438488 0.3355375785 9.210835e-02
##  [110,] 0.291090430 0.4308138364 2.125348e-01
##  [111,] 0.349346279 0.3975319727 1.507880e-01
##  [112,] 0.375000000 0.3750000000 1.250000e-01
##  [113,] 0.426168977 0.1482326877 1.718640e-02
##  [114,] 0.321175019 0.4163379880 1.798991e-01
##  [115,] 0.443086838 0.2436977611 4.467792e-02
##  [116,] 0.362525595 0.3866939680 1.374912e-01
##  [117,] 0.444358195 0.2275981001 3.885821e-02
##  [118,] 0.335537578 0.4074384881 1.649156e-01
##  [119,] 0.362525595 0.3866939680 1.374912e-01
##  [120,] 0.386693968 0.3625255950 1.132892e-01
##  [121,] 0.397531973 0.3493462791 1.023338e-01
##  [122,] 0.444358195 0.2275981001 3.885821e-02
##  [123,] 0.424154946 0.3063341278 7.374710e-02
##  [124,] 0.442218287 0.1953987782 2.877966e-02
##  [125,] 0.335537578 0.4074384881 1.649156e-01
##  [126,] 0.293645732 0.0435030714 2.148300e-03
##  [127,] 0.392899701 0.1042386963 9.218388e-03
##  [128,] 0.243697761 0.4430868383 2.685375e-01
##  [129,] 0.377630828 0.0906313987 7.250512e-03
##  [130,] 0.318229499 0.0540389715 3.058810e-03
##  [131,] 0.443086838 0.2436977611 4.467792e-02
##  [132,] 0.291090430 0.4308138364 2.125348e-01
##  [133,] 0.433331375 0.1637029640 2.061445e-02
##  [134,] 0.360146521 0.0776786613 5.584740e-03
##  [135,] 0.266544426 0.0339238361 1.439193e-03
##  [136,] 0.443086838 0.2436977611 4.467792e-02
##  [137,] 0.318229499 0.0540389715 3.058810e-03
##  [138,] 0.375000000 0.3750000000 1.250000e-01
##  [139,] 0.169380014 0.0116813803 2.685375e-04
##  [140,] 0.375000000 0.3750000000 1.250000e-01
##  [141,] 0.266544426 0.0339238361 1.439193e-03
##  [142,] 0.360146521 0.0776786613 5.584740e-03
##  [143,] 0.442218287 0.1953987782 2.877966e-02
##  [144,] 0.433331375 0.1637029640 2.061445e-02
##  [145,] 0.243697761 0.4430868383 2.685375e-01
##  [146,] 0.444358195 0.2275981001 3.885821e-02
##  [147,] 0.440355309 0.2596967205 5.105149e-02
##  [148,] 0.442218287 0.1953987782 2.877966e-02
##  [149,] 0.179450170 0.4386559699 3.574234e-01
##  [150,] 0.318229499 0.0540389715 3.058810e-03
##  [151,] 0.442218287 0.1953987782 2.877966e-02
##  [152,] 0.275519452 0.4362391326 2.302373e-01
##  [153,] 0.438655970 0.1794501695 2.447048e-02
##  [154,] 0.204487093 0.0179374643 5.244873e-04
##  [155,] 0.407438488 0.3355375785 9.210835e-02
##  [156,] 0.293645732 0.0435030714 2.148300e-03
##  [157,] 0.430813836 0.2910904300 6.556091e-02
##  [158,] 0.438655970 0.1794501695 2.447048e-02
##  [159,] 0.306334128 0.4241549461 1.957638e-01
##  [160,] 0.443086838 0.2436977611 4.467792e-02
##  [161,] 0.426168977 0.1482326877 1.718640e-02
##  [162,] 0.430813836 0.2910904300 6.556091e-02
##  [163,] 0.227598100 0.4443581954 2.891855e-01
##  [164,] 0.211473264 0.4440938538 3.108657e-01
##  [165,] 0.375000000 0.3750000000 1.250000e-01
##  [166,] 0.416337988 0.3211750193 8.258786e-02
##  [167,] 0.426168977 0.1482326877 1.718640e-02
##  [168,] 0.169380014 0.0116813803 2.685375e-04
##  [169,] 0.443086838 0.2436977611 4.467792e-02
##  [170,] 0.440355309 0.2596967205 5.105149e-02
##  [171,] 0.438655970 0.1794501695 2.447048e-02
##  [172,] 0.397531973 0.3493462791 1.023338e-01
##  [173,] 0.433331375 0.1637029640 2.061445e-02
##  [174,] 0.443086838 0.2436977611 4.467792e-02
##  [175,] 0.259696720 0.4403553087 2.488965e-01
##  [176,] 0.033923836 0.2665444262 6.980925e-01
##  [177,] 0.360146521 0.0776786613 5.584740e-03
##  [178,] 0.377630828 0.0906313987 7.250512e-03
##  [179,] 0.360146521 0.0776786613 5.584740e-03
##  [180,] 0.438655970 0.1794501695 2.447048e-02
##  [181,] 0.444358195 0.2275981001 3.885821e-02
##  [182,] 0.386693968 0.3625255950 1.132892e-01
##  [183,] 0.416337988 0.3211750193 8.258786e-02
##  [184,] 0.362525595 0.3866939680 1.374912e-01
##  [185,] 0.243697761 0.4430868383 2.685375e-01
##  [186,] 0.386693968 0.3625255950 1.132892e-01
##  [187,] 0.440355309 0.2596967205 5.105149e-02
##  [188,] 0.318229499 0.0540389715 3.058810e-03
##  [189,] 0.424154946 0.3063341278 7.374710e-02
##  [190,] 0.406028666 0.1184250277 1.151354e-02
##  [191,] 0.407438488 0.3355375785 9.210835e-02
##  [192,] 0.169380014 0.0116813803 2.685375e-04
##  [193,] 0.321175019 0.4163379880 1.798991e-01
##  [194,] 0.426168977 0.1482326877 1.718640e-02
##  [195,] 0.444093854 0.2114732637 3.356718e-02
##  [196,] 0.266544426 0.0339238361 1.439193e-03
##  [197,] 0.360146521 0.0776786613 5.584740e-03
##  [198,] 0.340371253 0.0654560102 4.195898e-03
##  [199,] 0.291090430 0.4308138364 2.125348e-01
##  [200,] 0.275519452 0.4362391326 2.302373e-01
##  [201,] 0.195398778 0.4422182874 3.336033e-01
##  [202,] 0.397531973 0.3493462791 1.023338e-01
##  [203,] 0.335537578 0.4074384881 1.649156e-01
##  [204,] 0.417093250 0.1331148669 1.416116e-02
##  [205,] 0.243697761 0.4430868383 2.685375e-01
##  [206,] 0.318229499 0.0540389715 3.058810e-03
##  [207,] 0.335537578 0.4074384881 1.649156e-01
##  [208,] 0.416337988 0.3211750193 8.258786e-02
##  [209,] 0.169380014 0.0116813803 2.685375e-04
##  [210,] 0.266544426 0.0339238361 1.439193e-03
##  [211,] 0.438655970 0.1794501695 2.447048e-02
##  [212,] 0.392899701 0.1042386963 9.218388e-03
##  [213,] 0.335537578 0.4074384881 1.649156e-01
##  [214,] 0.407438488 0.3355375785 9.210835e-02
##  [215,] 0.416337988 0.3211750193 8.258786e-02
##  [216,] 0.443086838 0.2436977611 4.467792e-02
##  [217,] 0.436239133 0.2755194522 5.800410e-02
##  [218,] 0.440355309 0.2596967205 5.105149e-02
##  [219,] 0.266544426 0.0339238361 1.439193e-03
##  [220,] 0.236850055 0.0253767916 9.063140e-04
##  [221,] 0.349346279 0.3975319727 1.507880e-01
##  [222,] 0.440355309 0.2596967205 5.105149e-02
##  [223,] 0.377630828 0.0906313987 7.250512e-03
##  [224,] 0.291090430 0.4308138364 2.125348e-01
##  [225,] 0.204487093 0.0179374643 5.244873e-04
##  [226,] 0.211473264 0.4440938538 3.108657e-01
##  [227,] 0.443086838 0.2436977611 4.467792e-02
##  [228,] 0.000000000 0.0000000000 1.000000e+00
##  [229,] 0.443086838 0.2436977611 4.467792e-02
##  [230,] 0.433331375 0.1637029640 2.061445e-02
##  [231,] 0.291090430 0.4308138364 2.125348e-01
##  [232,] 0.236850055 0.0253767916 9.063140e-04
##  [233,] 0.444358195 0.2275981001 3.885821e-02
##  [234,] 0.377630828 0.0906313987 7.250512e-03
##  [235,] 0.090631399 0.3776308281 5.244873e-01
##  [236,] 0.306334128 0.4241549461 1.957638e-01
##  [237,] 0.318229499 0.0540389715 3.058810e-03
##  [238,] 0.426168977 0.1482326877 1.718640e-02
##  [239,] 0.321175019 0.4163379880 1.798991e-01
##  [240,] 0.227598100 0.4443581954 2.891855e-01
##  [241,] 0.416337988 0.3211750193 8.258786e-02
##  [242,] 0.430813836 0.2910904300 6.556091e-02
##  [243,] 0.377630828 0.0906313987 7.250512e-03
##  [244,] 0.436239133 0.2755194522 5.800410e-02
##  [245,] 0.204487093 0.0179374643 5.244873e-04
##  [246,] 0.243697761 0.4430868383 2.685375e-01
##  [247,] 0.417093250 0.1331148669 1.416116e-02
##  [248,] 0.275519452 0.4362391326 2.302373e-01
##  [249,] 0.442218287 0.1953987782 2.877966e-02
##  [250,] 0.417093250 0.1331148669 1.416116e-02
##  [251,] 0.362525595 0.3866939680 1.374912e-01
##  [252,] 0.430813836 0.2910904300 6.556091e-02
##  [253,] 0.321175019 0.4163379880 1.798991e-01
##  [254,] 0.442218287 0.1953987782 2.877966e-02
##  [255,] 0.090631399 0.0030210466 3.356718e-05
##  [256,] 0.293645732 0.0435030714 2.148300e-03
##  [257,] 0.360146521 0.0776786613 5.584740e-03
##  [258,] 0.259696720 0.4403553087 2.488965e-01
##  [259,] 0.397531973 0.3493462791 1.023338e-01
##  [260,] 0.444093854 0.2114732637 3.356718e-02
##  [261,] 0.204487093 0.0179374643 5.244873e-04
##  [262,] 0.392899701 0.1042386963 9.218388e-03
##  [263,] 0.430813836 0.2910904300 6.556091e-02
##  [264,] 0.417093250 0.1331148669 1.416116e-02
##  [265,] 0.386693968 0.3625255950 1.132892e-01
##  [266,] 0.377630828 0.0906313987 7.250512e-03
##  [267,] 0.424154946 0.3063341278 7.374710e-02
##  [268,] 0.444093854 0.2114732637 3.356718e-02
##  [269,] 0.397531973 0.3493462791 1.023338e-01
##  [270,] 0.340371253 0.0654560102 4.195898e-03
##  [271,] 0.204487093 0.0179374643 5.244873e-04
##  [272,] 0.318229499 0.0540389715 3.058810e-03
##  [273,] 0.417093250 0.1331148669 1.416116e-02
##  [274,] 0.375000000 0.3750000000 1.250000e-01
##  [275,] 0.318229499 0.0540389715 3.058810e-03
##  [276,] 0.386693968 0.3625255950 1.132892e-01
##  [277,] 0.444093854 0.2114732637 3.356718e-02
##  [278,] 0.243697761 0.4430868383 2.685375e-01
##  [279,] 0.407438488 0.3355375785 9.210835e-02
##  [280,] 0.321175019 0.4163379880 1.798991e-01
##  [281,] 0.436239133 0.2755194522 5.800410e-02
##  [282,] 0.443086838 0.2436977611 4.467792e-02
##  [283,] 0.433331375 0.1637029640 2.061445e-02
##  [284,] 0.362525595 0.3866939680 1.374912e-01
##  [285,] 0.426168977 0.1482326877 1.718640e-02
##  [286,] 0.386693968 0.3625255950 1.132892e-01
##  [287,] 0.375000000 0.3750000000 1.250000e-01
##  [288,] 0.440355309 0.2596967205 5.105149e-02
##  [289,] 0.243697761 0.4430868383 2.685375e-01
##  [290,] 0.362525595 0.3866939680 1.374912e-01
##  [291,] 0.444093854 0.2114732637 3.356718e-02
##  [292,] 0.377630828 0.0906313987 7.250512e-03
##  [293,] 0.424154946 0.3063341278 7.374710e-02
##  [294,] 0.243697761 0.4430868383 2.685375e-01
##  [295,] 0.416337988 0.3211750193 8.258786e-02
##  [296,] 0.424154946 0.3063341278 7.374710e-02
##  [297,] 0.416337988 0.3211750193 8.258786e-02
##  [298,] 0.349346279 0.3975319727 1.507880e-01
##  [299,] 0.195398778 0.4422182874 3.336033e-01
##  [300,] 0.416337988 0.3211750193 8.258786e-02
##  [301,] 0.426168977 0.1482326877 1.718640e-02
##  [302,] 0.417093250 0.1331148669 1.416116e-02
##  [303,] 0.362525595 0.3866939680 1.374912e-01
##  [304,] 0.362525595 0.3866939680 1.374912e-01
##  [305,] 0.444358195 0.2275981001 3.885821e-02
##  [306,] 0.293645732 0.0435030714 2.148300e-03
##  [307,] 0.375000000 0.3750000000 1.250000e-01
##  [308,] 0.392899701 0.1042386963 9.218388e-03
##  [309,] 0.275519452 0.4362391326 2.302373e-01
##  [310,] 0.195398778 0.4422182874 3.336033e-01
##  [311,] 0.275519452 0.4362391326 2.302373e-01
##  [312,] 0.349346279 0.3975319727 1.507880e-01
##  [313,] 0.436239133 0.2755194522 5.800410e-02
##  [314,] 0.416337988 0.3211750193 8.258786e-02
##  [315,] 0.386693968 0.3625255950 1.132892e-01
##  [316,] 0.417093250 0.1331148669 1.416116e-02
##  [317,] 0.392899701 0.1042386963 9.218388e-03
##  [318,] 0.386693968 0.3625255950 1.132892e-01
##  [319,] 0.433331375 0.1637029640 2.061445e-02
##  [320,] 0.397531973 0.3493462791 1.023338e-01
##  [321,] 0.416337988 0.3211750193 8.258786e-02
##  [322,] 0.407438488 0.3355375785 9.210835e-02
##  [323,] 0.397531973 0.3493462791 1.023338e-01
##  [324,] 0.375000000 0.3750000000 1.250000e-01
##  [325,] 0.438655970 0.1794501695 2.447048e-02
##  [326,] 0.349346279 0.3975319727 1.507880e-01
##  [327,] 0.407438488 0.3355375785 9.210835e-02
##  [328,] 0.430813836 0.2910904300 6.556091e-02
##  [329,] 0.424154946 0.3063341278 7.374710e-02
##  [330,] 0.195398778 0.4422182874 3.336033e-01
##  [331,] 0.442218287 0.1953987782 2.877966e-02
##  [332,] 0.444093854 0.2114732637 3.356718e-02
##  [333,] 0.440355309 0.2596967205 5.105149e-02
##  [334,] 0.377630828 0.0906313987 7.250512e-03
##  [335,] 0.349346279 0.3975319727 1.507880e-01
##  [336,] 0.433331375 0.1637029640 2.061445e-02
##  [337,] 0.318229499 0.0540389715 3.058810e-03
##  [338,] 0.349346279 0.3975319727 1.507880e-01
##  [339,] 0.440355309 0.2596967205 5.105149e-02
##  [340,] 0.163702964 0.4333313752 3.823512e-01
##  [341,] 0.340371253 0.0654560102 4.195898e-03
##  [342,] 0.362525595 0.3866939680 1.374912e-01
##  [343,] 0.440355309 0.2596967205 5.105149e-02
##  [344,] 0.204487093 0.0179374643 5.244873e-04
##  [345,] 0.416337988 0.3211750193 8.258786e-02
##  [346,] 0.163702964 0.4333313752 3.823512e-01
##  [347,] 0.227598100 0.4443581954 2.891855e-01
##  [348,] 0.377630828 0.0906313987 7.250512e-03
##  [349,] 0.416337988 0.3211750193 8.258786e-02
##  [350,] 0.335537578 0.4074384881 1.649156e-01
##  [351,] 0.306334128 0.4241549461 1.957638e-01
##  [352,] 0.377630828 0.0906313987 7.250512e-03
##  [353,] 0.397531973 0.3493462791 1.023338e-01
##  [354,] 0.397531973 0.3493462791 1.023338e-01
##  [355,] 0.444358195 0.2275981001 3.885821e-02
##  [356,] 0.362525595 0.3866939680 1.374912e-01
##  [357,] 0.397531973 0.3493462791 1.023338e-01
##  [358,] 0.416337988 0.3211750193 8.258786e-02
##  [359,] 0.424154946 0.3063341278 7.374710e-02
##  [360,] 0.436239133 0.2755194522 5.800410e-02
##  [361,] 0.275519452 0.4362391326 2.302373e-01
##  [362,] 0.362525595 0.3866939680 1.374912e-01
##  [363,] 0.321175019 0.4163379880 1.798991e-01
##  [364,] 0.444093854 0.2114732637 3.356718e-02
##  [365,] 0.275519452 0.4362391326 2.302373e-01
##  [366,] 0.362525595 0.3866939680 1.374912e-01
##  [367,] 0.375000000 0.3750000000 1.250000e-01
##  [368,] 0.436239133 0.2755194522 5.800410e-02
##  [369,] 0.362525595 0.3866939680 1.374912e-01
##  [370,] 0.321175019 0.4163379880 1.798991e-01
##  [371,] 0.340371253 0.0654560102 4.195898e-03
##  [372,] 0.416337988 0.3211750193 8.258786e-02
##  [373,] 0.236850055 0.0253767916 9.063140e-04
##  [374,] 0.266544426 0.0339238361 1.439193e-03
##  [375,] 0.397531973 0.3493462791 1.023338e-01
##  [376,] 0.444093854 0.2114732637 3.356718e-02
##  [377,] 0.417093250 0.1331148669 1.416116e-02
##  [378,] 0.444358195 0.2275981001 3.885821e-02
##  [379,] 0.407438488 0.3355375785 9.210835e-02
##  [380,] 0.195398778 0.4422182874 3.336033e-01
##  [381,] 0.406028666 0.1184250277 1.151354e-02
##  [382,] 0.195398778 0.4422182874 3.336033e-01
##  [383,] 0.416337988 0.3211750193 8.258786e-02
##  [384,] 0.243697761 0.4430868383 2.685375e-01
##  [385,] 0.266544426 0.0339238361 1.439193e-03
##  [386,] 0.426168977 0.1482326877 1.718640e-02
##  [387,] 0.424154946 0.3063341278 7.374710e-02
##  [388,] 0.148232688 0.4261689772 4.084119e-01
##  [389,] 0.306334128 0.4241549461 1.957638e-01
##  [390,] 0.436239133 0.2755194522 5.800410e-02
##  [391,] 0.392899701 0.1042386963 9.218388e-03
##  [392,] 0.266544426 0.0339238361 1.439193e-03
##  [393,] 0.349346279 0.3975319727 1.507880e-01
##  [394,] 0.340371253 0.0654560102 4.195898e-03
##  [395,] 0.321175019 0.4163379880 1.798991e-01
##  [396,] 0.407438488 0.3355375785 9.210835e-02
##  [397,] 0.444093854 0.2114732637 3.356718e-02
##  [398,] 0.444358195 0.2275981001 3.885821e-02
##  [399,] 0.442218287 0.1953987782 2.877966e-02
##  [400,] 0.227598100 0.4443581954 2.891855e-01
##  [401,] 0.417093250 0.1331148669 1.416116e-02
##  [402,] 0.204487093 0.0179374643 5.244873e-04
##  [403,] 0.442218287 0.1953987782 2.877966e-02
##  [404,] 0.318229499 0.0540389715 3.058810e-03
##  [405,] 0.397531973 0.3493462791 1.023338e-01
##  [406,] 0.335537578 0.4074384881 1.649156e-01
##  [407,] 0.442218287 0.1953987782 2.877966e-02
##  [408,] 0.426168977 0.1482326877 1.718640e-02
##  [409,] 0.349346279 0.3975319727 1.507880e-01
##  [410,] 0.362525595 0.3866939680 1.374912e-01
##  [411,] 0.306334128 0.4241549461 1.957638e-01
##  [412,] 0.362525595 0.3866939680 1.374912e-01
##  [413,] 0.406028666 0.1184250277 1.151354e-02
##  [414,] 0.442218287 0.1953987782 2.877966e-02
##  [415,] 0.046838810 0.0007678494 4.195898e-06
##  [416,] 0.406028666 0.1184250277 1.151354e-02
##  [417,] 0.436239133 0.2755194522 5.800410e-02
##  [418,] 0.430813836 0.2910904300 6.556091e-02
##  [419,] 0.424154946 0.3063341278 7.374710e-02
##  [420,] 0.443086838 0.2436977611 4.467792e-02
##  [421,] 0.430813836 0.2910904300 6.556091e-02
##  [422,] 0.406028666 0.1184250277 1.151354e-02
##  [423,] 0.195398778 0.4422182874 3.336033e-01
##  [424,] 0.397531973 0.3493462791 1.023338e-01
##  [425,] 0.291090430 0.4308138364 2.125348e-01
##  [426,] 0.335537578 0.4074384881 1.649156e-01
##  [427,] 0.318229499 0.0540389715 3.058810e-03
##  [428,] 0.169380014 0.0116813803 2.685375e-04
##  [429,] 0.436239133 0.2755194522 5.800410e-02
##  [430,] 0.392899701 0.1042386963 9.218388e-03
##  [431,] 0.227598100 0.4443581954 2.891855e-01
##  [432,] 0.438655970 0.1794501695 2.447048e-02
##  [433,] 0.406028666 0.1184250277 1.151354e-02
##  [434,] 0.406028666 0.1184250277 1.151354e-02
##  [435,] 0.266544426 0.0339238361 1.439193e-03
##  [436,] 0.430813836 0.2910904300 6.556091e-02
##  [437,] 0.424154946 0.3063341278 7.374710e-02
##  [438,] 0.259696720 0.4403553087 2.488965e-01
##  [439,] 0.440355309 0.2596967205 5.105149e-02
##  [440,] 0.444093854 0.2114732637 3.356718e-02
##  [441,] 0.243697761 0.4430868383 2.685375e-01
##  [442,] 0.227598100 0.4443581954 2.891855e-01
##  [443,] 0.444358195 0.2275981001 3.885821e-02
##  [444,] 0.424154946 0.3063341278 7.374710e-02
##  [445,] 0.065456010 0.3403712531 5.899768e-01
##  [446,] 0.318229499 0.0540389715 3.058810e-03
##  [447,] 0.397531973 0.3493462791 1.023338e-01
##  [448,] 0.360146521 0.0776786613 5.584740e-03
##  [449,] 0.436239133 0.2755194522 5.800410e-02
##  [450,] 0.349346279 0.3975319727 1.507880e-01
##  [451,] 0.444358195 0.2275981001 3.885821e-02
##  [452,] 0.204487093 0.0179374643 5.244873e-04
##  [453,] 0.392899701 0.1042386963 9.218388e-03
##  [454,] 0.227598100 0.4443581954 2.891855e-01
##  [455,] 0.436239133 0.2755194522 5.800410e-02
##  [456,] 0.433331375 0.1637029640 2.061445e-02
##  [457,] 0.444093854 0.2114732637 3.356718e-02
##  [458,] 0.416337988 0.3211750193 8.258786e-02
##  [459,] 0.243697761 0.4430868383 2.685375e-01
##  [460,] 0.293645732 0.0435030714 2.148300e-03
##  [461,] 0.377630828 0.0906313987 7.250512e-03
##  [462,] 0.306334128 0.4241549461 1.957638e-01
##  [463,] 0.335537578 0.4074384881 1.649156e-01
##  [464,] 0.033923836 0.2665444262 6.980925e-01
##  [465,] 0.133114867 0.4170932496 4.356307e-01
##  [466,] 0.321175019 0.4163379880 1.798991e-01
##  [467,] 0.335537578 0.4074384881 1.649156e-01
##  [468,] 0.259696720 0.4403553087 2.488965e-01
##  [469,] 0.406028666 0.1184250277 1.151354e-02
##  [470,] 0.349346279 0.3975319727 1.507880e-01
##  [471,] 0.430813836 0.2910904300 6.556091e-02
##  [472,] 0.362525595 0.3866939680 1.374912e-01
##  [473,] 0.321175019 0.4163379880 1.798991e-01
##  [474,] 0.306334128 0.4241549461 1.957638e-01
##  [475,] 0.443086838 0.2436977611 4.467792e-02
##  [476,] 0.377630828 0.0906313987 7.250512e-03
##  [477,] 0.416337988 0.3211750193 8.258786e-02
##  [478,] 0.291090430 0.4308138364 2.125348e-01
##  [479,] 0.416337988 0.3211750193 8.258786e-02
##  [480,] 0.424154946 0.3063341278 7.374710e-02
##  [481,] 0.442218287 0.1953987782 2.877966e-02
##  [482,] 0.440355309 0.2596967205 5.105149e-02
##  [483,] 0.335537578 0.4074384881 1.649156e-01
##  [484,] 0.291090430 0.4308138364 2.125348e-01
##  [485,] 0.430813836 0.2910904300 6.556091e-02
##  [486,] 0.318229499 0.0540389715 3.058810e-03
##  [487,] 0.430813836 0.2910904300 6.556091e-02
##  [488,] 0.407438488 0.3355375785 9.210835e-02
##  [489,] 0.386693968 0.3625255950 1.132892e-01
##  [490,] 0.360146521 0.0776786613 5.584740e-03
##  [491,] 0.236850055 0.0253767916 9.063140e-04
##  [492,] 0.362525595 0.3866939680 1.374912e-01
##  [493,] 0.236850055 0.0253767916 9.063140e-04
##  [494,] 0.436239133 0.2755194522 5.800410e-02
##  [495,] 0.375000000 0.3750000000 1.250000e-01
##  [496,] 0.443086838 0.2436977611 4.467792e-02
##  [497,] 0.440355309 0.2596967205 5.105149e-02
##  [498,] 0.426168977 0.1482326877 1.718640e-02
##  [499,] 0.236850055 0.0253767916 9.063140e-04
##  [500,] 0.424154946 0.3063341278 7.374710e-02
##  [501,] 0.266544426 0.0339238361 1.439193e-03
##  [502,] 0.443086838 0.2436977611 4.467792e-02
##  [503,] 0.266544426 0.0339238361 1.439193e-03
##  [504,] 0.424154946 0.3063341278 7.374710e-02
##  [505,] 0.243697761 0.4430868383 2.685375e-01
##  [506,] 0.335537578 0.4074384881 1.649156e-01
##  [507,] 0.211473264 0.4440938538 3.108657e-01
##  [508,] 0.349346279 0.3975319727 1.507880e-01
##  [509,] 0.416337988 0.3211750193 8.258786e-02
##  [510,] 0.430813836 0.2910904300 6.556091e-02
##  [511,] 0.416337988 0.3211750193 8.258786e-02
##  [512,] 0.443086838 0.2436977611 4.467792e-02
##  [513,] 0.349346279 0.3975319727 1.507880e-01
##  [514,] 0.335537578 0.4074384881 1.649156e-01
##  [515,] 0.392899701 0.1042386963 9.218388e-03
##  [516,] 0.443086838 0.2436977611 4.467792e-02
##  [517,] 0.293645732 0.0435030714 2.148300e-03
##  [518,] 0.375000000 0.3750000000 1.250000e-01
##  [519,] 0.444093854 0.2114732637 3.356718e-02
##  [520,] 0.362525595 0.3866939680 1.374912e-01
##  [521,] 0.360146521 0.0776786613 5.584740e-03
##  [522,] 0.417093250 0.1331148669 1.416116e-02
##  [523,] 0.179450170 0.4386559699 3.574234e-01
##  [524,] 0.416337988 0.3211750193 8.258786e-02
##  [525,] 0.275519452 0.4362391326 2.302373e-01
##  [526,] 0.243697761 0.4430868383 2.685375e-01
##  [527,] 0.444358195 0.2275981001 3.885821e-02
##  [528,] 0.375000000 0.3750000000 1.250000e-01
##  [529,] 0.236850055 0.0253767916 9.063140e-04
##  [530,] 0.243697761 0.4430868383 2.685375e-01
##  [531,] 0.397531973 0.3493462791 1.023338e-01
##  [532,] 0.440355309 0.2596967205 5.105149e-02
##  [533,] 0.054038972 0.3182294988 6.246727e-01
##  [534,] 0.397531973 0.3493462791 1.023338e-01
##  [535,] 0.444093854 0.2114732637 3.356718e-02
##  [536,] 0.392899701 0.1042386963 9.218388e-03
##  [537,] 0.275519452 0.4362391326 2.302373e-01
##  [538,] 0.424154946 0.3063341278 7.374710e-02
##  [539,] 0.417093250 0.1331148669 1.416116e-02
##  [540,] 0.392899701 0.1042386963 9.218388e-03
##  [541,] 0.291090430 0.4308138364 2.125348e-01
##  [542,] 0.386693968 0.3625255950 1.132892e-01
##  [543,] 0.291090430 0.4308138364 2.125348e-01
##  [544,] 0.407438488 0.3355375785 9.210835e-02
##  [545,] 0.386693968 0.3625255950 1.132892e-01
##  [546,] 0.204487093 0.0179374643 5.244873e-04
##  [547,] 0.211473264 0.4440938538 3.108657e-01
##  [548,] 0.426168977 0.1482326877 1.718640e-02
##  [549,] 0.416337988 0.3211750193 8.258786e-02
##  [550,] 0.340371253 0.0654560102 4.195898e-03
##  [551,] 0.417093250 0.1331148669 1.416116e-02
##  [552,] 0.243697761 0.4430868383 2.685375e-01
##  [553,] 0.397531973 0.3493462791 1.023338e-01
##  [554,] 0.236850055 0.0253767916 9.063140e-04
##  [555,] 0.275519452 0.4362391326 2.302373e-01
##  [556,] 0.275519452 0.4362391326 2.302373e-01
##  [557,] 0.204487093 0.0179374643 5.244873e-04
##  [558,] 0.416337988 0.3211750193 8.258786e-02
##  [559,] 0.243697761 0.4430868383 2.685375e-01
##  [560,] 0.377630828 0.0906313987 7.250512e-03
##  [561,] 0.386693968 0.3625255950 1.132892e-01
##  [562,] 0.442218287 0.1953987782 2.877966e-02
##  [563,] 0.375000000 0.3750000000 1.250000e-01
##  [564,] 0.392899701 0.1042386963 9.218388e-03
##  [565,] 0.335537578 0.4074384881 1.649156e-01
##  [566,] 0.065456010 0.3403712531 5.899768e-01
##  [567,] 0.426168977 0.1482326877 1.718640e-02
##  [568,] 0.444093854 0.2114732637 3.356718e-02
##  [569,] 0.340371253 0.0654560102 4.195898e-03
##  [570,] 0.444093854 0.2114732637 3.356718e-02
##  [571,] 0.444358195 0.2275981001 3.885821e-02
##  [572,] 0.335537578 0.4074384881 1.649156e-01
##  [573,] 0.426168977 0.1482326877 1.718640e-02
##  [574,] 0.417093250 0.1331148669 1.416116e-02
##  [575,] 0.243697761 0.4430868383 2.685375e-01
##  [576,] 0.444093854 0.2114732637 3.356718e-02
##  [577,] 0.444093854 0.2114732637 3.356718e-02
##  [578,] 0.392899701 0.1042386963 9.218388e-03
##  [579,] 0.321175019 0.4163379880 1.798991e-01
##  [580,] 0.131453291 0.0066840657 1.132892e-04
##  [581,] 0.444093854 0.2114732637 3.356718e-02
##  [582,] 0.340371253 0.0654560102 4.195898e-03
##  [583,] 0.406028666 0.1184250277 1.151354e-02
##  [584,] 0.340371253 0.0654560102 4.195898e-03
##  [585,] 0.436239133 0.2755194522 5.800410e-02
##  [586,] 0.340371253 0.0654560102 4.195898e-03
##  [587,] 0.386693968 0.3625255950 1.132892e-01
##  [588,] 0.291090430 0.4308138364 2.125348e-01
##  [589,] 0.442218287 0.1953987782 2.877966e-02
##  [590,] 0.090631399 0.3776308281 5.244873e-01
##  [591,] 0.133114867 0.4170932496 4.356307e-01
##  [592,] 0.442218287 0.1953987782 2.877966e-02
##  [593,] 0.417093250 0.1331148669 1.416116e-02
##  [594,] 0.046838810 0.0007678494 4.195898e-06
##  [595,] 0.362525595 0.3866939680 1.374912e-01
##  [596,] 0.443086838 0.2436977611 4.467792e-02
##  [597,] 0.118425028 0.4060286664 4.640328e-01
##  [598,] 0.433331375 0.1637029640 2.061445e-02
##  [599,] 0.417093250 0.1331148669 1.416116e-02
##  [600,] 0.424154946 0.3063341278 7.374710e-02
##  [601,] 0.397531973 0.3493462791 1.023338e-01
##  [602,] 0.291090430 0.4308138364 2.125348e-01
##  [603,] 0.417093250 0.1331148669 1.416116e-02
##  [604,] 0.275519452 0.4362391326 2.302373e-01
##  [605,] 0.397531973 0.3493462791 1.023338e-01
##  [606,] 0.416337988 0.3211750193 8.258786e-02
##  [607,] 0.424154946 0.3063341278 7.374710e-02
##  [608,] 0.266544426 0.0339238361 1.439193e-03
##  [609,] 0.416337988 0.3211750193 8.258786e-02
##  [610,] 0.275519452 0.4362391326 2.302373e-01
##  [611,] 0.397531973 0.3493462791 1.023338e-01
##  [612,] 0.444358195 0.2275981001 3.885821e-02
##  [613,] 0.386693968 0.3625255950 1.132892e-01
##  [614,] 0.436239133 0.2755194522 5.800410e-02
##  [615,] 0.291090430 0.4308138364 2.125348e-01
##  [616,] 0.195398778 0.4422182874 3.336033e-01
##  [617,] 0.444358195 0.2275981001 3.885821e-02
##  [618,] 0.377630828 0.0906313987 7.250512e-03
##  [619,] 0.375000000 0.3750000000 1.250000e-01
##  [620,] 0.417093250 0.1331148669 1.416116e-02
##  [621,] 0.392899701 0.1042386963 9.218388e-03
##  [622,] 0.291090430 0.4308138364 2.125348e-01
##  [623,] 0.438655970 0.1794501695 2.447048e-02
##  [624,] 0.417093250 0.1331148669 1.416116e-02
##  [625,] 0.386693968 0.3625255950 1.132892e-01
##  [626,] 0.211473264 0.4440938538 3.108657e-01
##  [627,] 0.340371253 0.0654560102 4.195898e-03
##  [628,] 0.360146521 0.0776786613 5.584740e-03
##  [629,] 0.406028666 0.1184250277 1.151354e-02
##  [630,] 0.417093250 0.1331148669 1.416116e-02
##  [631,] 0.443086838 0.2436977611 4.467792e-02
##  [632,] 0.436239133 0.2755194522 5.800410e-02
##  [633,] 0.444358195 0.2275981001 3.885821e-02
##  [634,] 0.424154946 0.3063341278 7.374710e-02
##  [635,] 0.430813836 0.2910904300 6.556091e-02
##  [636,] 0.424154946 0.3063341278 7.374710e-02
##  [637,] 0.360146521 0.0776786613 5.584740e-03
##  [638,] 0.397531973 0.3493462791 1.023338e-01
##  [639,] 0.407438488 0.3355375785 9.210835e-02
##  [640,] 0.335537578 0.4074384881 1.649156e-01
##  [641,] 0.444093854 0.2114732637 3.356718e-02
##  [642,] 0.436239133 0.2755194522 5.800410e-02
##  [643,] 0.275519452 0.4362391326 2.302373e-01
##  [644,] 0.360146521 0.0776786613 5.584740e-03
##  [645,] 0.417093250 0.1331148669 1.416116e-02
##  [646,] 0.417093250 0.1331148669 1.416116e-02
##  [647,] 0.440355309 0.2596967205 5.105149e-02
##  [648,] 0.424154946 0.3063341278 7.374710e-02
##  [649,] 0.416337988 0.3211750193 8.258786e-02
##  [650,] 0.243697761 0.4430868383 2.685375e-01
##  [651,] 0.360146521 0.0776786613 5.584740e-03
##  [652,] 0.436239133 0.2755194522 5.800410e-02
##  [653,] 0.397531973 0.3493462791 1.023338e-01
##  [654,] 0.377630828 0.0906313987 7.250512e-03
##  [655,] 0.444358195 0.2275981001 3.885821e-02
##  [656,] 0.375000000 0.3750000000 1.250000e-01
##  [657,] 0.424154946 0.3063341278 7.374710e-02
##  [658,] 0.306334128 0.4241549461 1.957638e-01
##  [659,] 0.436239133 0.2755194522 5.800410e-02
##  [660,] 0.444358195 0.2275981001 3.885821e-02
##  [661,] 0.377630828 0.0906313987 7.250512e-03
##  [662,] 0.417093250 0.1331148669 1.416116e-02
##  [663,] 0.444093854 0.2114732637 3.356718e-02
##  [664,] 0.335537578 0.4074384881 1.649156e-01
##  [665,] 0.306334128 0.4241549461 1.957638e-01
##  [666,] 0.179450170 0.4386559699 3.574234e-01
##  [667,] 0.259696720 0.4403553087 2.488965e-01
##  [668,] 0.406028666 0.1184250277 1.151354e-02
##  [669,] 0.443086838 0.2436977611 4.467792e-02
##  [670,] 0.375000000 0.3750000000 1.250000e-01
##  [671,] 0.306334128 0.4241549461 1.957638e-01
##  [672,] 0.386693968 0.3625255950 1.132892e-01
##  [673,] 0.407438488 0.3355375785 9.210835e-02
##  [674,] 0.377630828 0.0906313987 7.250512e-03
##  [675,] 0.318229499 0.0540389715 3.058810e-03
##  [676,] 0.291090430 0.4308138364 2.125348e-01
##  [677,] 0.406028666 0.1184250277 1.151354e-02
##  [678,] 0.375000000 0.3750000000 1.250000e-01
##  [679,] 0.362525595 0.3866939680 1.374912e-01
##  [680,] 0.362525595 0.3866939680 1.374912e-01
##  [681,] 0.424154946 0.3063341278 7.374710e-02
##  [682,] 0.259696720 0.4403553087 2.488965e-01
##  [683,] 0.043503071 0.2936457319 6.607029e-01
##  [684,] 0.204487093 0.0179374643 5.244873e-04
##  [685,] 0.392899701 0.1042386963 9.218388e-03
##  [686,] 0.407438488 0.3355375785 9.210835e-02
##  [687,] 0.291090430 0.4308138364 2.125348e-01
##  [688,] 0.424154946 0.3063341278 7.374710e-02
##  [689,] 0.424154946 0.3063341278 7.374710e-02
##  [690,] 0.406028666 0.1184250277 1.151354e-02
##  [691,] 0.211473264 0.4440938538 3.108657e-01
##  [692,] 0.386693968 0.3625255950 1.132892e-01
##  [693,] 0.306334128 0.4241549461 1.957638e-01
##  [694,] 0.360146521 0.0776786613 5.584740e-03
##  [695,] 0.433331375 0.1637029640 2.061445e-02
##  [696,] 0.266544426 0.0339238361 1.439193e-03
##  [697,] 0.349346279 0.3975319727 1.507880e-01
##  [698,] 0.417093250 0.1331148669 1.416116e-02
##  [699,] 0.227598100 0.4443581954 2.891855e-01
##  [700,] 0.179450170 0.4386559699 3.574234e-01
##  [701,] 0.340371253 0.0654560102 4.195898e-03
##  [702,] 0.335537578 0.4074384881 1.649156e-01
##  [703,] 0.360146521 0.0776786613 5.584740e-03
##  [704,] 0.426168977 0.1482326877 1.718640e-02
##  [705,] 0.266544426 0.0339238361 1.439193e-03
##  [706,] 0.118425028 0.4060286664 4.640328e-01
##  [707,] 0.430813836 0.2910904300 6.556091e-02
##  [708,] 0.416337988 0.3211750193 8.258786e-02
##  [709,] 0.433331375 0.1637029640 2.061445e-02
##  [710,] 0.375000000 0.3750000000 1.250000e-01
##  [711,] 0.211473264 0.4440938538 3.108657e-01
##  [712,] 0.291090430 0.4308138364 2.125348e-01
##  [713,] 0.406028666 0.1184250277 1.151354e-02
##  [714,] 0.321175019 0.4163379880 1.798991e-01
##  [715,] 0.259696720 0.4403553087 2.488965e-01
##  [716,] 0.349346279 0.3975319727 1.507880e-01
##  [717,] 0.275519452 0.4362391326 2.302373e-01
##  [718,] 0.377630828 0.0906313987 7.250512e-03
##  [719,] 0.131453291 0.0066840657 1.132892e-04
##  [720,] 0.211473264 0.4440938538 3.108657e-01
##  [721,] 0.211473264 0.4440938538 3.108657e-01
##  [722,] 0.386693968 0.3625255950 1.132892e-01
##  [723,] 0.444358195 0.2275981001 3.885821e-02
##  [724,] 0.406028666 0.1184250277 1.151354e-02
##  [725,] 0.349346279 0.3975319727 1.507880e-01
##  [726,] 0.424154946 0.3063341278 7.374710e-02
##  [727,] 0.407438488 0.3355375785 9.210835e-02
##  [728,] 0.236850055 0.0253767916 9.063140e-04
##  [729,] 0.442218287 0.1953987782 2.877966e-02
##  [730,] 0.043503071 0.2936457319 6.607029e-01
##  [731,] 0.362525595 0.3866939680 1.374912e-01
##  [732,] 0.318229499 0.0540389715 3.058810e-03
##  [733,] 0.440355309 0.2596967205 5.105149e-02
##  [734,] 0.090631399 0.0030210466 3.356718e-05
##  [735,] 0.375000000 0.3750000000 1.250000e-01
##  [736,] 0.266544426 0.0339238361 1.439193e-03
##  [737,] 0.321175019 0.4163379880 1.798991e-01
##  [738,] 0.416337988 0.3211750193 8.258786e-02
##  [739,] 0.406028666 0.1184250277 1.151354e-02
##  [740,] 0.397531973 0.3493462791 1.023338e-01
##  [741,] 0.293645732 0.0435030714 2.148300e-03
##  [742,] 0.392899701 0.1042386963 9.218388e-03
##  [743,] 0.406028666 0.1184250277 1.151354e-02
##  [744,] 0.362525595 0.3866939680 1.374912e-01
##  [745,] 0.375000000 0.3750000000 1.250000e-01
##  [746,] 0.266544426 0.0339238361 1.439193e-03
##  [747,] 0.211473264 0.4440938538 3.108657e-01
##  [748,] 0.179450170 0.4386559699 3.574234e-01
##  [749,] 0.163702964 0.4333313752 3.823512e-01
##  [750,] 0.360146521 0.0776786613 5.584740e-03
##  [751,] 0.349346279 0.3975319727 1.507880e-01
##  [752,] 0.340371253 0.0654560102 4.195898e-03
##  [753,] 0.438655970 0.1794501695 2.447048e-02
##  [754,] 0.340371253 0.0654560102 4.195898e-03
##  [755,] 0.444093854 0.2114732637 3.356718e-02
##  [756,] 0.433331375 0.1637029640 2.061445e-02
##  [757,] 0.407438488 0.3355375785 9.210835e-02
##  [758,] 0.442218287 0.1953987782 2.877966e-02
##  [759,] 0.227598100 0.4443581954 2.891855e-01
##  [760,] 0.349346279 0.3975319727 1.507880e-01
##  [761,] 0.293645732 0.0435030714 2.148300e-03
##  [762,] 0.406028666 0.1184250277 1.151354e-02
##  [763,] 0.204487093 0.0179374643 5.244873e-04
##  [764,] 0.362525595 0.3866939680 1.374912e-01
##  [765,] 0.266544426 0.0339238361 1.439193e-03
##  [766,] 0.430813836 0.2910904300 6.556091e-02
##  [767,] 0.438655970 0.1794501695 2.447048e-02
##  [768,] 0.362525595 0.3866939680 1.374912e-01
##  [769,] 0.426168977 0.1482326877 1.718640e-02
##  [770,] 0.426168977 0.1482326877 1.718640e-02
##  [771,] 0.444358195 0.2275981001 3.885821e-02
##  [772,] 0.443086838 0.2436977611 4.467792e-02
##  [773,] 0.406028666 0.1184250277 1.151354e-02
##  [774,] 0.163702964 0.4333313752 3.823512e-01
##  [775,] 0.104238696 0.3928997013 4.936432e-01
##  [776,] 0.444358195 0.2275981001 3.885821e-02
##  [777,] 0.392899701 0.1042386963 9.218388e-03
##  [778,] 0.195398778 0.4422182874 3.336033e-01
##  [779,] 0.131453291 0.0066840657 1.132892e-04
##  [780,] 0.321175019 0.4163379880 1.798991e-01
##  [781,] 0.436239133 0.2755194522 5.800410e-02
##  [782,] 0.306334128 0.4241549461 1.957638e-01
##  [783,] 0.438655970 0.1794501695 2.447048e-02
##  [784,] 0.211473264 0.4440938538 3.108657e-01
##  [785,] 0.436239133 0.2755194522 5.800410e-02
##  [786,] 0.440355309 0.2596967205 5.105149e-02
##  [787,] 0.426168977 0.1482326877 1.718640e-02
##  [788,] 0.169380014 0.0116813803 2.685375e-04
##  [789,] 0.397531973 0.3493462791 1.023338e-01
##  [790,] 0.227598100 0.4443581954 2.891855e-01
##  [791,] 0.360146521 0.0776786613 5.584740e-03
##  [792,] 0.406028666 0.1184250277 1.151354e-02
##  [793,] 0.375000000 0.3750000000 1.250000e-01
##  [794,] 0.417093250 0.1331148669 1.416116e-02
##  [795,] 0.349346279 0.3975319727 1.507880e-01
##  [796,] 0.442218287 0.1953987782 2.877966e-02
##  [797,] 0.163702964 0.4333313752 3.823512e-01
##  [798,] 0.443086838 0.2436977611 4.467792e-02
##  [799,] 0.416337988 0.3211750193 8.258786e-02
##  [800,] 0.133114867 0.4170932496 4.356307e-01
##  [801,] 0.362525595 0.3866939680 1.374912e-01
##  [802,] 0.386693968 0.3625255950 1.132892e-01
##  [803,] 0.377630828 0.0906313987 7.250512e-03
##  [804,] 0.442218287 0.1953987782 2.877966e-02
##  [805,] 0.349346279 0.3975319727 1.507880e-01
##  [806,] 0.291090430 0.4308138364 2.125348e-01
##  [807,] 0.417093250 0.1331148669 1.416116e-02
##  [808,] 0.426168977 0.1482326877 1.718640e-02
##  [809,] 0.375000000 0.3750000000 1.250000e-01
##  [810,] 0.179450170 0.4386559699 3.574234e-01
##  [811,] 0.392899701 0.1042386963 9.218388e-03
##  [812,] 0.430813836 0.2910904300 6.556091e-02
##  [813,] 0.430813836 0.2910904300 6.556091e-02
##  [814,] 0.386693968 0.3625255950 1.132892e-01
##  [815,] 0.386693968 0.3625255950 1.132892e-01
##  [816,] 0.360146521 0.0776786613 5.584740e-03
##  [817,] 0.335537578 0.4074384881 1.649156e-01
##  [818,] 0.443086838 0.2436977611 4.467792e-02
##  [819,] 0.306334128 0.4241549461 1.957638e-01
##  [820,] 0.444093854 0.2114732637 3.356718e-02
##  [821,] 0.340371253 0.0654560102 4.195898e-03
##  [822,] 0.417093250 0.1331148669 1.416116e-02
##  [823,] 0.424154946 0.3063341278 7.374710e-02
##  [824,] 0.440355309 0.2596967205 5.105149e-02
##  [825,] 0.392899701 0.1042386963 9.218388e-03
##  [826,] 0.236850055 0.0253767916 9.063140e-04
##  [827,] 0.426168977 0.1482326877 1.718640e-02
##  [828,] 0.340371253 0.0654560102 4.195898e-03
##  [829,] 0.377630828 0.0906313987 7.250512e-03
##  [830,] 0.416337988 0.3211750193 8.258786e-02
##  [831,] 0.433331375 0.1637029640 2.061445e-02
##  [832,] 0.397531973 0.3493462791 1.023338e-01
##  [833,] 0.054038972 0.3182294988 6.246727e-01
##  [834,] 0.444358195 0.2275981001 3.885821e-02
##  [835,] 0.440355309 0.2596967205 5.105149e-02
##  [836,] 0.090631399 0.0030210466 3.356718e-05
##  [837,] 0.426168977 0.1482326877 1.718640e-02
##  [838,] 0.293645732 0.0435030714 2.148300e-03
##  [839,] 0.349346279 0.3975319727 1.507880e-01
##  [840,] 0.266544426 0.0339238361 1.439193e-03
##  [841,] 0.442218287 0.1953987782 2.877966e-02
##  [842,] 0.291090430 0.4308138364 2.125348e-01
##  [843,] 0.444358195 0.2275981001 3.885821e-02
##  [844,] 0.407438488 0.3355375785 9.210835e-02
##  [845,] 0.386693968 0.3625255950 1.132892e-01
##  [846,] 0.306334128 0.4241549461 1.957638e-01
##  [847,] 0.386693968 0.3625255950 1.132892e-01
##  [848,] 0.397531973 0.3493462791 1.023338e-01
##  [849,] 0.090631399 0.0030210466 3.356718e-05
##  [850,] 0.442218287 0.1953987782 2.877966e-02
##  [851,] 0.407438488 0.3355375785 9.210835e-02
##  [852,] 0.306334128 0.4241549461 1.957638e-01
##  [853,] 0.349346279 0.3975319727 1.507880e-01
##  [854,] 0.406028666 0.1184250277 1.151354e-02
##  [855,] 0.433331375 0.1637029640 2.061445e-02
##  [856,] 0.179450170 0.4386559699 3.574234e-01
##  [857,] 0.397531973 0.3493462791 1.023338e-01
##  [858,] 0.340371253 0.0654560102 4.195898e-03
##  [859,] 0.195398778 0.4422182874 3.336033e-01
##  [860,] 0.293645732 0.0435030714 2.148300e-03
##  [861,] 0.436239133 0.2755194522 5.800410e-02
##  [862,] 0.392899701 0.1042386963 9.218388e-03
##  [863,] 0.424154946 0.3063341278 7.374710e-02
##  [864,] 0.407438488 0.3355375785 9.210835e-02
##  [865,] 0.306334128 0.4241549461 1.957638e-01
##  [866,] 0.443086838 0.2436977611 4.467792e-02
##  [867,] 0.444093854 0.2114732637 3.356718e-02
##  [868,] 0.430813836 0.2910904300 6.556091e-02
##  [869,] 0.377630828 0.0906313987 7.250512e-03
##  [870,] 0.243697761 0.4430868383 2.685375e-01
##  [871,] 0.416337988 0.3211750193 8.258786e-02
##  [872,] 0.397531973 0.3493462791 1.023338e-01
##  [873,] 0.397531973 0.3493462791 1.023338e-01
##  [874,] 0.227598100 0.4443581954 2.891855e-01
##  [875,] 0.443086838 0.2436977611 4.467792e-02
##  [876,] 0.436239133 0.2755194522 5.800410e-02
##  [877,] 0.360146521 0.0776786613 5.584740e-03
##  [878,] 0.243697761 0.4430868383 2.685375e-01
##  [879,] 0.433331375 0.1637029640 2.061445e-02
##  [880,] 0.386693968 0.3625255950 1.132892e-01
##  [881,] 0.318229499 0.0540389715 3.058810e-03
##  [882,] 0.443086838 0.2436977611 4.467792e-02
##  [883,] 0.426168977 0.1482326877 1.718640e-02
##  [884,] 0.090631399 0.0030210466 3.356718e-05
##  [885,] 0.362525595 0.3866939680 1.374912e-01
##  [886,] 0.436239133 0.2755194522 5.800410e-02
##  [887,] 0.416337988 0.3211750193 8.258786e-02
##  [888,] 0.227598100 0.4443581954 2.891855e-01
##  [889,] 0.104238696 0.3928997013 4.936432e-01
##  [890,] 0.293645732 0.0435030714 2.148300e-03
##  [891,] 0.426168977 0.1482326877 1.718640e-02
##  [892,] 0.424154946 0.3063341278 7.374710e-02
##  [893,] 0.321175019 0.4163379880 1.798991e-01
##  [894,] 0.306334128 0.4241549461 1.957638e-01
##  [895,] 0.291090430 0.4308138364 2.125348e-01
##  [896,] 0.377630828 0.0906313987 7.250512e-03
##  [897,] 0.386693968 0.3625255950 1.132892e-01
##  [898,] 0.386693968 0.3625255950 1.132892e-01
##  [899,] 0.377630828 0.0906313987 7.250512e-03
##  [900,] 0.266544426 0.0339238361 1.439193e-03
##  [901,] 0.227598100 0.4443581954 2.891855e-01
##  [902,] 0.444093854 0.2114732637 3.356718e-02
##  [903,] 0.443086838 0.2436977611 4.467792e-02
##  [904,] 0.438655970 0.1794501695 2.447048e-02
##  [905,] 0.340371253 0.0654560102 4.195898e-03
##  [906,] 0.426168977 0.1482326877 1.718640e-02
##  [907,] 0.444358195 0.2275981001 3.885821e-02
##  [908,] 0.340371253 0.0654560102 4.195898e-03
##  [909,] 0.318229499 0.0540389715 3.058810e-03
##  [910,] 0.426168977 0.1482326877 1.718640e-02
##  [911,] 0.444093854 0.2114732637 3.356718e-02
##  [912,] 0.349346279 0.3975319727 1.507880e-01
##  [913,] 0.436239133 0.2755194522 5.800410e-02
##  [914,] 0.406028666 0.1184250277 1.151354e-02
##  [915,] 0.318229499 0.0540389715 3.058810e-03
##  [916,] 0.349346279 0.3975319727 1.507880e-01
##  [917,] 0.266544426 0.0339238361 1.439193e-03
##  [918,] 0.211473264 0.4440938538 3.108657e-01
##  [919,] 0.179450170 0.4386559699 3.574234e-01
##  [920,] 0.321175019 0.4163379880 1.798991e-01
##  [921,] 0.444358195 0.2275981001 3.885821e-02
##  [922,] 0.204487093 0.0179374643 5.244873e-04
##  [923,] 0.397531973 0.3493462791 1.023338e-01
##  [924,] 0.406028666 0.1184250277 1.151354e-02
##  [925,] 0.259696720 0.4403553087 2.488965e-01
##  [926,] 0.243697761 0.4430868383 2.685375e-01
##  [927,] 0.397531973 0.3493462791 1.023338e-01
##  [928,] 0.440355309 0.2596967205 5.105149e-02
##  [929,] 0.318229499 0.0540389715 3.058810e-03
##  [930,] 0.046838810 0.0007678494 4.195898e-06
##  [931,] 0.424154946 0.3063341278 7.374710e-02
##  [932,] 0.406028666 0.1184250277 1.151354e-02
##  [933,] 0.392899701 0.1042386963 9.218388e-03
##  [934,] 0.362525595 0.3866939680 1.374912e-01
##  [935,] 0.335537578 0.4074384881 1.649156e-01
##  [936,] 0.417093250 0.1331148669 1.416116e-02
##  [937,] 0.360146521 0.0776786613 5.584740e-03
##  [938,] 0.426168977 0.1482326877 1.718640e-02
##  [939,] 0.169380014 0.0116813803 2.685375e-04
##  [940,] 0.436239133 0.2755194522 5.800410e-02
##  [941,] 0.424154946 0.3063341278 7.374710e-02
##  [942,] 0.416337988 0.3211750193 8.258786e-02
##  [943,] 0.407438488 0.3355375785 9.210835e-02
##  [944,] 0.227598100 0.4443581954 2.891855e-01
##  [945,] 0.335537578 0.4074384881 1.649156e-01
##  [946,] 0.416337988 0.3211750193 8.258786e-02
##  [947,] 0.321175019 0.4163379880 1.798991e-01
##  [948,] 0.340371253 0.0654560102 4.195898e-03
##  [949,] 0.335537578 0.4074384881 1.649156e-01
##  [950,] 0.440355309 0.2596967205 5.105149e-02
##  [951,] 0.424154946 0.3063341278 7.374710e-02
##  [952,] 0.386693968 0.3625255950 1.132892e-01
##  [953,] 0.397531973 0.3493462791 1.023338e-01
##  [954,] 0.392899701 0.1042386963 9.218388e-03
##  [955,] 0.340371253 0.0654560102 4.195898e-03
##  [956,] 0.416337988 0.3211750193 8.258786e-02
##  [957,] 0.275519452 0.4362391326 2.302373e-01
##  [958,] 0.397531973 0.3493462791 1.023338e-01
##  [959,] 0.440355309 0.2596967205 5.105149e-02
##  [960,] 0.375000000 0.3750000000 1.250000e-01
##  [961,] 0.386693968 0.3625255950 1.132892e-01
##  [962,] 0.259696720 0.4403553087 2.488965e-01
##  [963,] 0.416337988 0.3211750193 8.258786e-02
##  [964,] 0.335537578 0.4074384881 1.649156e-01
##  [965,] 0.349346279 0.3975319727 1.507880e-01
##  [966,] 0.407438488 0.3355375785 9.210835e-02
##  [967,] 0.416337988 0.3211750193 8.258786e-02
##  [968,] 0.443086838 0.2436977611 4.467792e-02
##  [969,] 0.386693968 0.3625255950 1.132892e-01
##  [970,] 0.397531973 0.3493462791 1.023338e-01
##  [971,] 0.416337988 0.3211750193 8.258786e-02
##  [972,] 0.375000000 0.3750000000 1.250000e-01
##  [973,] 0.259696720 0.4403553087 2.488965e-01
##  [974,] 0.006684066 0.1314532913 8.617494e-01
##  [975,] 0.386693968 0.3625255950 1.132892e-01
##  [976,] 0.275519452 0.4362391326 2.302373e-01
##  [977,] 0.444358195 0.2275981001 3.885821e-02
##  [978,] 0.424154946 0.3063341278 7.374710e-02
##  [979,] 0.375000000 0.3750000000 1.250000e-01
##  [980,] 0.243697761 0.4430868383 2.685375e-01
##  [981,] 0.407438488 0.3355375785 9.210835e-02
##  [982,] 0.293645732 0.0435030714 2.148300e-03
##  [983,] 0.195398778 0.4422182874 3.336033e-01
##  [984,] 0.179450170 0.4386559699 3.574234e-01
##  [985,] 0.397531973 0.3493462791 1.023338e-01
##  [986,] 0.443086838 0.2436977611 4.467792e-02
##  [987,] 0.433331375 0.1637029640 2.061445e-02
##  [988,] 0.195398778 0.4422182874 3.336033e-01
##  [989,] 0.416337988 0.3211750193 8.258786e-02
##  [990,] 0.318229499 0.0540389715 3.058810e-03
##  [991,] 0.360146521 0.0776786613 5.584740e-03
##  [992,] 0.362525595 0.3866939680 1.374912e-01
##  [993,] 0.266544426 0.0339238361 1.439193e-03
##  [994,] 0.440355309 0.2596967205 5.105149e-02
##  [995,] 0.444093854 0.2114732637 3.356718e-02
##  [996,] 0.438655970 0.1794501695 2.447048e-02
##  [997,] 0.204487093 0.0179374643 5.244873e-04
##  [998,] 0.340371253 0.0654560102 4.195898e-03
##  [999,] 0.436239133 0.2755194522 5.800410e-02
## [1000,] 0.442218287 0.1953987782 2.877966e-02
## [1001,] 0.243697761 0.4430868383 2.685375e-01
## [1002,] 0.148232688 0.4261689772 4.084119e-01
## [1003,] 0.416337988 0.3211750193 8.258786e-02
## [1004,] 0.443086838 0.2436977611 4.467792e-02
## [1005,] 0.291090430 0.4308138364 2.125348e-01
## [1006,] 0.407438488 0.3355375785 9.210835e-02
## [1007,] 0.291090430 0.4308138364 2.125348e-01
## [1008,] 0.321175019 0.4163379880 1.798991e-01
## [1009,] 0.417093250 0.1331148669 1.416116e-02
## [1010,] 0.306334128 0.4241549461 1.957638e-01
## [1011,] 0.406028666 0.1184250277 1.151354e-02
## [1012,] 0.306334128 0.4241549461 1.957638e-01
## [1013,] 0.444093854 0.2114732637 3.356718e-02
## [1014,] 0.392899701 0.1042386963 9.218388e-03
## [1015,] 0.440355309 0.2596967205 5.105149e-02
## [1016,] 0.416337988 0.3211750193 8.258786e-02
## [1017,] 0.375000000 0.3750000000 1.250000e-01
## [1018,] 0.362525595 0.3866939680 1.374912e-01
## [1019,] 0.443086838 0.2436977611 4.467792e-02
## [1020,] 0.360146521 0.0776786613 5.584740e-03
## [1021,] 0.406028666 0.1184250277 1.151354e-02
## [1022,] 0.349346279 0.3975319727 1.507880e-01
## [1023,] 0.436239133 0.2755194522 5.800410e-02
## [1024,] 0.227598100 0.4443581954 2.891855e-01
## [1025,] 0.392899701 0.1042386963 9.218388e-03
## [1026,] 0.360146521 0.0776786613 5.584740e-03
## [1027,] 0.293645732 0.0435030714 2.148300e-03
## [1028,] 0.362525595 0.3866939680 1.374912e-01
## [1029,] 0.179450170 0.4386559699 3.574234e-01
## [1030,] 0.433331375 0.1637029640 2.061445e-02
## [1031,] 0.169380014 0.0116813803 2.685375e-04
## [1032,] 0.291090430 0.4308138364 2.125348e-01
## [1033,] 0.163702964 0.4333313752 3.823512e-01
## [1034,] 0.430813836 0.2910904300 6.556091e-02
## [1035,] 0.375000000 0.3750000000 1.250000e-01
## [1036,] 0.438655970 0.1794501695 2.447048e-02
## [1037,] 0.293645732 0.0435030714 2.148300e-03
## [1038,] 0.407438488 0.3355375785 9.210835e-02
## [1039,] 0.169380014 0.0116813803 2.685375e-04
## [1040,] 0.163702964 0.4333313752 3.823512e-01
## [1041,] 0.424154946 0.3063341278 7.374710e-02
## [1042,] 0.349346279 0.3975319727 1.507880e-01
## [1043,] 0.407438488 0.3355375785 9.210835e-02
## [1044,] 0.430813836 0.2910904300 6.556091e-02
## [1045,] 0.443086838 0.2436977611 4.467792e-02
## [1046,] 0.440355309 0.2596967205 5.105149e-02
## [1047,] 0.349346279 0.3975319727 1.507880e-01
## [1048,] 0.426168977 0.1482326877 1.718640e-02
## [1049,] 0.416337988 0.3211750193 8.258786e-02
## [1050,] 0.433331375 0.1637029640 2.061445e-02
## [1051,] 0.417093250 0.1331148669 1.416116e-02
## [1052,] 0.407438488 0.3355375785 9.210835e-02
## [1053,] 0.424154946 0.3063341278 7.374710e-02
## [1054,] 0.362525595 0.3866939680 1.374912e-01
## [1055,] 0.291090430 0.4308138364 2.125348e-01
## [1056,] 0.375000000 0.3750000000 1.250000e-01
## [1057,] 0.397531973 0.3493462791 1.023338e-01
## [1058,] 0.443086838 0.2436977611 4.467792e-02
## [1059,] 0.131453291 0.0066840657 1.132892e-04
## [1060,] 0.211473264 0.4440938538 3.108657e-01
## [1061,] 0.275519452 0.4362391326 2.302373e-01
## [1062,] 0.195398778 0.4422182874 3.336033e-01
## [1063,] 0.424154946 0.3063341278 7.374710e-02
## [1064,] 0.430813836 0.2910904300 6.556091e-02
## [1065,] 0.360146521 0.0776786613 5.584740e-03
## [1066,] 0.444093854 0.2114732637 3.356718e-02
## [1067,] 0.293645732 0.0435030714 2.148300e-03
## [1068,] 0.340371253 0.0654560102 4.195898e-03
## [1069,] 0.416337988 0.3211750193 8.258786e-02
## [1070,] 0.444358195 0.2275981001 3.885821e-02
## [1071,] 0.417093250 0.1331148669 1.416116e-02
## [1072,] 0.424154946 0.3063341278 7.374710e-02
## [1073,] 0.386693968 0.3625255950 1.132892e-01
## [1074,] 0.416337988 0.3211750193 8.258786e-02
## [1075,] 0.275519452 0.4362391326 2.302373e-01
## [1076,] 0.443086838 0.2436977611 4.467792e-02
## [1077,] 0.054038972 0.3182294988 6.246727e-01
## [1078,] 0.377630828 0.0906313987 7.250512e-03
## [1079,] 0.416337988 0.3211750193 8.258786e-02
## [1080,] 0.440355309 0.2596967205 5.105149e-02
## [1081,] 0.443086838 0.2436977611 4.467792e-02
## [1082,] 0.227598100 0.4443581954 2.891855e-01
## [1083,] 0.444093854 0.2114732637 3.356718e-02
## [1084,] 0.293645732 0.0435030714 2.148300e-03
## [1085,] 0.321175019 0.4163379880 1.798991e-01
## [1086,] 0.407438488 0.3355375785 9.210835e-02
## [1087,] 0.436239133 0.2755194522 5.800410e-02
## [1088,] 0.377630828 0.0906313987 7.250512e-03
## [1089,] 0.426168977 0.1482326877 1.718640e-02
## [1090,] 0.335537578 0.4074384881 1.649156e-01
## [1091,] 0.335537578 0.4074384881 1.649156e-01
## [1092,] 0.306334128 0.4241549461 1.957638e-01
## [1093,] 0.397531973 0.3493462791 1.023338e-01
## [1094,] 0.131453291 0.0066840657 1.132892e-04
## [1095,] 0.043503071 0.2936457319 6.607029e-01
## [1096,] 0.444093854 0.2114732637 3.356718e-02
## [1097,] 0.321175019 0.4163379880 1.798991e-01
## [1098,] 0.433331375 0.1637029640 2.061445e-02
## [1099,] 0.211473264 0.4440938538 3.108657e-01
## [1100,] 0.444358195 0.2275981001 3.885821e-02
## [1101,] 0.195398778 0.4422182874 3.336033e-01
## [1102,] 0.148232688 0.4261689772 4.084119e-01
## [1103,] 0.407438488 0.3355375785 9.210835e-02
## [1104,] 0.266544426 0.0339238361 1.439193e-03
## [1105,] 0.000000000 0.0000000000 1.000000e+00
## [1106,] 0.349346279 0.3975319727 1.507880e-01
## [1107,] 0.243697761 0.4430868383 2.685375e-01
## [1108,] 0.335537578 0.4074384881 1.649156e-01
## [1109,] 0.416337988 0.3211750193 8.258786e-02
## [1110,] 0.392899701 0.1042386963 9.218388e-03
## [1111,] 0.375000000 0.3750000000 1.250000e-01
## [1112,] 0.397531973 0.3493462791 1.023338e-01
## [1113,] 0.444358195 0.2275981001 3.885821e-02
## [1114,] 0.321175019 0.4163379880 1.798991e-01
## [1115,] 0.442218287 0.1953987782 2.877966e-02
## [1116,] 0.335537578 0.4074384881 1.649156e-01
## [1117,] 0.444358195 0.2275981001 3.885821e-02
## [1118,] 0.163702964 0.4333313752 3.823512e-01
## [1119,] 0.204487093 0.0179374643 5.244873e-04
## [1120,] 0.179450170 0.4386559699 3.574234e-01
## [1121,] 0.430813836 0.2910904300 6.556091e-02
## [1122,] 0.426168977 0.1482326877 1.718640e-02
## [1123,] 0.444093854 0.2114732637 3.356718e-02
## [1124,] 0.266544426 0.0339238361 1.439193e-03
## [1125,] 0.377630828 0.0906313987 7.250512e-03
## [1126,] 0.417093250 0.1331148669 1.416116e-02
## [1127,] 0.360146521 0.0776786613 5.584740e-03
## [1128,] 0.406028666 0.1184250277 1.151354e-02
## [1129,] 0.306334128 0.4241549461 1.957638e-01
## [1130,] 0.236850055 0.0253767916 9.063140e-04
## [1131,] 0.377630828 0.0906313987 7.250512e-03
## [1132,] 0.397531973 0.3493462791 1.023338e-01
## [1133,] 0.424154946 0.3063341278 7.374710e-02
## [1134,] 0.440355309 0.2596967205 5.105149e-02
## [1135,] 0.306334128 0.4241549461 1.957638e-01
## [1136,] 0.266544426 0.0339238361 1.439193e-03
## [1137,] 0.375000000 0.3750000000 1.250000e-01
## [1138,] 0.433331375 0.1637029640 2.061445e-02
## [1139,] 0.118425028 0.4060286664 4.640328e-01
## [1140,] 0.259696720 0.4403553087 2.488965e-01
## [1141,] 0.397531973 0.3493462791 1.023338e-01
## [1142,] 0.275519452 0.4362391326 2.302373e-01
## [1143,] 0.426168977 0.1482326877 1.718640e-02
## [1144,] 0.204487093 0.0179374643 5.244873e-04
## [1145,] 0.430813836 0.2910904300 6.556091e-02
## [1146,] 0.438655970 0.1794501695 2.447048e-02
## [1147,] 0.169380014 0.0116813803 2.685375e-04
## [1148,] 0.362525595 0.3866939680 1.374912e-01
## [1149,] 0.243697761 0.4430868383 2.685375e-01
## [1150,] 0.424154946 0.3063341278 7.374710e-02
## [1151,] 0.362525595 0.3866939680 1.374912e-01
## [1152,] 0.291090430 0.4308138364 2.125348e-01
## [1153,] 0.406028666 0.1184250277 1.151354e-02
## [1154,] 0.362525595 0.3866939680 1.374912e-01
## [1155,] 0.236850055 0.0253767916 9.063140e-04
## [1156,] 0.321175019 0.4163379880 1.798991e-01
## [1157,] 0.266544426 0.0339238361 1.439193e-03
## [1158,] 0.259696720 0.4403553087 2.488965e-01
## [1159,] 0.430813836 0.2910904300 6.556091e-02
## [1160,] 0.443086838 0.2436977611 4.467792e-02
## [1161,] 0.444358195 0.2275981001 3.885821e-02
## [1162,] 0.406028666 0.1184250277 1.151354e-02
## [1163,] 0.386693968 0.3625255950 1.132892e-01
## [1164,] 0.433331375 0.1637029640 2.061445e-02
## [1165,] 0.335537578 0.4074384881 1.649156e-01
## [1166,] 0.362525595 0.3866939680 1.374912e-01
## [1167,] 0.433331375 0.1637029640 2.061445e-02
## [1168,] 0.318229499 0.0540389715 3.058810e-03
## [1169,] 0.259696720 0.4403553087 2.488965e-01
## [1170,] 0.386693968 0.3625255950 1.132892e-01
## [1171,] 0.440355309 0.2596967205 5.105149e-02
## [1172,] 0.227598100 0.4443581954 2.891855e-01
## [1173,] 0.291090430 0.4308138364 2.125348e-01
## [1174,] 0.426168977 0.1482326877 1.718640e-02
## [1175,] 0.430813836 0.2910904300 6.556091e-02
## [1176,] 0.430813836 0.2910904300 6.556091e-02
## [1177,] 0.417093250 0.1331148669 1.416116e-02
## [1178,] 0.131453291 0.0066840657 1.132892e-04
## [1179,] 0.306334128 0.4241549461 1.957638e-01
## [1180,] 0.306334128 0.4241549461 1.957638e-01
## [1181,] 0.433331375 0.1637029640 2.061445e-02
## [1182,] 0.204487093 0.0179374643 5.244873e-04
## [1183,] 0.195398778 0.4422182874 3.336033e-01
## [1184,] 0.349346279 0.3975319727 1.507880e-01
## [1185,] 0.090631399 0.0030210466 3.356718e-05
## [1186,] 0.349346279 0.3975319727 1.507880e-01
## [1187,] 0.133114867 0.4170932496 4.356307e-01
## [1188,] 0.442218287 0.1953987782 2.877966e-02
## [1189,] 0.236850055 0.0253767916 9.063140e-04
## [1190,] 0.438655970 0.1794501695 2.447048e-02
## [1191,] 0.417093250 0.1331148669 1.416116e-02
## [1192,] 0.438655970 0.1794501695 2.447048e-02
## [1193,] 0.406028666 0.1184250277 1.151354e-02
## [1194,] 0.416337988 0.3211750193 8.258786e-02
## [1195,] 0.417093250 0.1331148669 1.416116e-02
## [1196,] 0.397531973 0.3493462791 1.023338e-01
## [1197,] 0.442218287 0.1953987782 2.877966e-02
## [1198,] 0.259696720 0.4403553087 2.488965e-01
## [1199,] 0.397531973 0.3493462791 1.023338e-01
## [1200,] 0.360146521 0.0776786613 5.584740e-03
## [1201,] 0.442218287 0.1953987782 2.877966e-02
## [1202,] 0.259696720 0.4403553087 2.488965e-01
## [1203,] 0.444358195 0.2275981001 3.885821e-02
## [1204,] 0.227598100 0.4443581954 2.891855e-01
## [1205,] 0.392899701 0.1042386963 9.218388e-03
## [1206,] 0.293645732 0.0435030714 2.148300e-03
## [1207,] 0.444093854 0.2114732637 3.356718e-02
## [1208,] 0.349346279 0.3975319727 1.507880e-01
## [1209,] 0.406028666 0.1184250277 1.151354e-02
## [1210,] 0.375000000 0.3750000000 1.250000e-01
## [1211,] 0.443086838 0.2436977611 4.467792e-02
## [1212,] 0.211473264 0.4440938538 3.108657e-01
## [1213,] 0.377630828 0.0906313987 7.250512e-03
## [1214,] 0.440355309 0.2596967205 5.105149e-02
## [1215,] 0.406028666 0.1184250277 1.151354e-02
## [1216,] 0.440355309 0.2596967205 5.105149e-02
## [1217,] 0.321175019 0.4163379880 1.798991e-01
## [1218,] 0.433331375 0.1637029640 2.061445e-02
## [1219,] 0.430813836 0.2910904300 6.556091e-02
## [1220,] 0.362525595 0.3866939680 1.374912e-01
## [1221,] 0.046838810 0.0007678494 4.195898e-06
## [1222,] 0.321175019 0.4163379880 1.798991e-01
## [1223,] 0.169380014 0.0116813803 2.685375e-04
## [1224,] 0.375000000 0.3750000000 1.250000e-01
## [1225,] 0.417093250 0.1331148669 1.416116e-02
## [1226,] 0.392899701 0.1042386963 9.218388e-03
## [1227,] 0.430813836 0.2910904300 6.556091e-02
## [1228,] 0.443086838 0.2436977611 4.467792e-02
## [1229,] 0.386693968 0.3625255950 1.132892e-01
## [1230,] 0.407438488 0.3355375785 9.210835e-02
## [1231,] 0.243697761 0.4430868383 2.685375e-01
## [1232,] 0.362525595 0.3866939680 1.374912e-01
## [1233,] 0.444093854 0.2114732637 3.356718e-02
## [1234,] 0.417093250 0.1331148669 1.416116e-02
## [1235,] 0.335537578 0.4074384881 1.649156e-01
## [1236,] 0.321175019 0.4163379880 1.798991e-01
## [1237,] 0.442218287 0.1953987782 2.877966e-02
## [1238,] 0.306334128 0.4241549461 1.957638e-01
## [1239,] 0.306334128 0.4241549461 1.957638e-01
## [1240,] 0.266544426 0.0339238361 1.439193e-03
## [1241,] 0.433331375 0.1637029640 2.061445e-02
## [1242,] 0.360146521 0.0776786613 5.584740e-03
## [1243,] 0.430813836 0.2910904300 6.556091e-02
## [1244,] 0.291090430 0.4308138364 2.125348e-01
## [1245,] 0.386693968 0.3625255950 1.132892e-01
## [1246,] 0.436239133 0.2755194522 5.800410e-02
## [1247,] 0.430813836 0.2910904300 6.556091e-02
## [1248,] 0.406028666 0.1184250277 1.151354e-02
## [1249,] 0.090631399 0.0030210466 3.356718e-05
## [1250,] 0.430813836 0.2910904300 6.556091e-02
## [1251,] 0.243697761 0.4430868383 2.685375e-01
## [1252,] 0.444093854 0.2114732637 3.356718e-02
## [1253,] 0.204487093 0.0179374643 5.244873e-04
## [1254,] 0.306334128 0.4241549461 1.957638e-01
## [1255,] 0.118425028 0.4060286664 4.640328e-01
## [1256,] 0.397531973 0.3493462791 1.023338e-01
## [1257,] 0.444358195 0.2275981001 3.885821e-02
## [1258,] 0.433331375 0.1637029640 2.061445e-02
## [1259,] 0.443086838 0.2436977611 4.467792e-02
## [1260,] 0.443086838 0.2436977611 4.467792e-02
## [1261,] 0.433331375 0.1637029640 2.061445e-02
## [1262,] 0.293645732 0.0435030714 2.148300e-03
## [1263,] 0.204487093 0.0179374643 5.244873e-04
## [1264,] 0.195398778 0.4422182874 3.336033e-01
## [1265,] 0.236850055 0.0253767916 9.063140e-04
## [1266,] 0.362525595 0.3866939680 1.374912e-01
## [1267,] 0.169380014 0.0116813803 2.685375e-04
## [1268,] 0.179450170 0.4386559699 3.574234e-01
## [1269,] 0.440355309 0.2596967205 5.105149e-02
## [1270,] 0.306334128 0.4241549461 1.957638e-01
## [1271,] 0.360146521 0.0776786613 5.584740e-03
## [1272,] 0.444358195 0.2275981001 3.885821e-02
## [1273,] 0.054038972 0.3182294988 6.246727e-01
## [1274,] 0.169380014 0.0116813803 2.685375e-04
## [1275,] 0.386693968 0.3625255950 1.132892e-01
## [1276,] 0.433331375 0.1637029640 2.061445e-02
## [1277,] 0.407438488 0.3355375785 9.210835e-02
## [1278,] 0.291090430 0.4308138364 2.125348e-01
## [1279,] 0.438655970 0.1794501695 2.447048e-02
## [1280,] 0.131453291 0.0066840657 1.132892e-04
## [1281,] 0.440355309 0.2596967205 5.105149e-02
## [1282,] 0.406028666 0.1184250277 1.151354e-02
## [1283,] 0.438655970 0.1794501695 2.447048e-02
## [1284,] 0.340371253 0.0654560102 4.195898e-03
## [1285,] 0.440355309 0.2596967205 5.105149e-02
## [1286,] 0.291090430 0.4308138364 2.125348e-01
## [1287,] 0.424154946 0.3063341278 7.374710e-02
## [1288,] 0.440355309 0.2596967205 5.105149e-02
## [1289,] 0.259696720 0.4403553087 2.488965e-01
## [1290,] 0.291090430 0.4308138364 2.125348e-01
## [1291,] 0.438655970 0.1794501695 2.447048e-02
## [1292,] 0.430813836 0.2910904300 6.556091e-02
## [1293,] 0.318229499 0.0540389715 3.058810e-03
## [1294,] 0.406028666 0.1184250277 1.151354e-02
## [1295,] 0.444093854 0.2114732637 3.356718e-02
## [1296,] 0.340371253 0.0654560102 4.195898e-03
## [1297,] 0.436239133 0.2755194522 5.800410e-02
## [1298,] 0.349346279 0.3975319727 1.507880e-01
## [1299,] 0.291090430 0.4308138364 2.125348e-01
## [1300,] 0.444358195 0.2275981001 3.885821e-02
## [1301,] 0.436239133 0.2755194522 5.800410e-02
## [1302,] 0.204487093 0.0179374643 5.244873e-04
## [1303,] 0.443086838 0.2436977611 4.467792e-02
## [1304,] 0.443086838 0.2436977611 4.467792e-02
## [1305,] 0.349346279 0.3975319727 1.507880e-01
## [1306,] 0.011681380 0.1693800141 8.186701e-01
## [1307,] 0.318229499 0.0540389715 3.058810e-03
## [1308,] 0.266544426 0.0339238361 1.439193e-03
## [1309,] 0.318229499 0.0540389715 3.058810e-03
## [1310,] 0.417093250 0.1331148669 1.416116e-02
## [1311,] 0.349346279 0.3975319727 1.507880e-01
## [1312,] 0.169380014 0.0116813803 2.685375e-04
## [1313,] 0.397531973 0.3493462791 1.023338e-01
## [1314,] 0.426168977 0.1482326877 1.718640e-02
## [1315,] 0.397531973 0.3493462791 1.023338e-01
## [1316,] 0.392899701 0.1042386963 9.218388e-03
## [1317,] 0.397531973 0.3493462791 1.023338e-01
## [1318,] 0.375000000 0.3750000000 1.250000e-01
## [1319,] 0.443086838 0.2436977611 4.467792e-02
## [1320,] 0.349346279 0.3975319727 1.507880e-01
## [1321,] 0.392899701 0.1042386963 9.218388e-03
## [1322,] 0.386693968 0.3625255950 1.132892e-01
## [1323,] 0.275519452 0.4362391326 2.302373e-01
## [1324,] 0.407438488 0.3355375785 9.210835e-02
## [1325,] 0.321175019 0.4163379880 1.798991e-01
## [1326,] 0.406028666 0.1184250277 1.151354e-02
## [1327,] 0.291090430 0.4308138364 2.125348e-01
## [1328,] 0.433331375 0.1637029640 2.061445e-02
## [1329,] 0.417093250 0.1331148669 1.416116e-02
## [1330,] 0.417093250 0.1331148669 1.416116e-02
## [1331,] 0.440355309 0.2596967205 5.105149e-02
## [1332,] 0.436239133 0.2755194522 5.800410e-02
## [1333,] 0.243697761 0.4430868383 2.685375e-01
## [1334,] 0.416337988 0.3211750193 8.258786e-02
## [1335,] 0.397531973 0.3493462791 1.023338e-01
## [1336,] 0.426168977 0.1482326877 1.718640e-02
## [1337,] 0.430813836 0.2910904300 6.556091e-02
## [1338,] 0.243697761 0.4430868383 2.685375e-01
## [1339,] 0.424154946 0.3063341278 7.374710e-02
## [1340,] 0.438655970 0.1794501695 2.447048e-02
## [1341,] 0.397531973 0.3493462791 1.023338e-01
## [1342,] 0.275519452 0.4362391326 2.302373e-01
## [1343,] 0.444093854 0.2114732637 3.356718e-02
## [1344,] 0.424154946 0.3063341278 7.374710e-02
## [1345,] 0.275519452 0.4362391326 2.302373e-01
## [1346,] 0.349346279 0.3975319727 1.507880e-01
## [1347,] 0.440355309 0.2596967205 5.105149e-02
## [1348,] 0.335537578 0.4074384881 1.649156e-01
## [1349,] 0.318229499 0.0540389715 3.058810e-03
## [1350,] 0.335537578 0.4074384881 1.649156e-01
## [1351,] 0.349346279 0.3975319727 1.507880e-01
## [1352,] 0.349346279 0.3975319727 1.507880e-01
## [1353,] 0.340371253 0.0654560102 4.195898e-03
## [1354,] 0.375000000 0.3750000000 1.250000e-01
## [1355,] 0.195398778 0.4422182874 3.336033e-01
## [1356,] 0.204487093 0.0179374643 5.244873e-04
## [1357,] 0.321175019 0.4163379880 1.798991e-01
## [1358,] 0.291090430 0.4308138364 2.125348e-01
## [1359,] 0.386693968 0.3625255950 1.132892e-01
## [1360,] 0.362525595 0.3866939680 1.374912e-01
## [1361,] 0.375000000 0.3750000000 1.250000e-01
## [1362,] 0.375000000 0.3750000000 1.250000e-01
## [1363,] 0.430813836 0.2910904300 6.556091e-02
## [1364,] 0.407438488 0.3355375785 9.210835e-02
## [1365,] 0.386693968 0.3625255950 1.132892e-01
## [1366,] 0.046838810 0.0007678494 4.195898e-06
## [1367,] 0.275519452 0.4362391326 2.302373e-01
## [1368,] 0.424154946 0.3063341278 7.374710e-02
## [1369,] 0.436239133 0.2755194522 5.800410e-02
## [1370,] 0.406028666 0.1184250277 1.151354e-02
## [1371,] 0.406028666 0.1184250277 1.151354e-02
## [1372,] 0.430813836 0.2910904300 6.556091e-02
## [1373,] 0.259696720 0.4403553087 2.488965e-01
## [1374,] 0.104238696 0.3928997013 4.936432e-01
## [1375,] 0.392899701 0.1042386963 9.218388e-03
## [1376,] 0.375000000 0.3750000000 1.250000e-01
## [1377,] 0.440355309 0.2596967205 5.105149e-02
## [1378,] 0.433331375 0.1637029640 2.061445e-02
## [1379,] 0.417093250 0.1331148669 1.416116e-02
## [1380,] 0.321175019 0.4163379880 1.798991e-01
## [1381,] 0.430813836 0.2910904300 6.556091e-02
## [1382,] 0.438655970 0.1794501695 2.447048e-02
## [1383,] 0.444093854 0.2114732637 3.356718e-02
## [1384,] 0.243697761 0.4430868383 2.685375e-01
## [1385,] 0.416337988 0.3211750193 8.258786e-02
## [1386,] 0.426168977 0.1482326877 1.718640e-02
## [1387,] 0.131453291 0.0066840657 1.132892e-04
## [1388,] 0.444358195 0.2275981001 3.885821e-02
## [1389,] 0.340371253 0.0654560102 4.195898e-03
## [1390,] 0.306334128 0.4241549461 1.957638e-01
## [1391,] 0.236850055 0.0253767916 9.063140e-04
## [1392,] 0.392899701 0.1042386963 9.218388e-03
## [1393,] 0.424154946 0.3063341278 7.374710e-02
## [1394,] 0.377630828 0.0906313987 7.250512e-03
## [1395,] 0.440355309 0.2596967205 5.105149e-02
## [1396,] 0.293645732 0.0435030714 2.148300e-03
## [1397,] 0.406028666 0.1184250277 1.151354e-02
## [1398,] 0.436239133 0.2755194522 5.800410e-02
## [1399,] 0.424154946 0.3063341278 7.374710e-02
## [1400,] 0.377630828 0.0906313987 7.250512e-03
## [1401,] 0.243697761 0.4430868383 2.685375e-01
## [1402,] 0.417093250 0.1331148669 1.416116e-02
## [1403,] 0.340371253 0.0654560102 4.195898e-03
## [1404,] 0.430813836 0.2910904300 6.556091e-02
## [1405,] 0.375000000 0.3750000000 1.250000e-01
## [1406,] 0.438655970 0.1794501695 2.447048e-02
## [1407,] 0.397531973 0.3493462791 1.023338e-01
## [1408,] 0.426168977 0.1482326877 1.718640e-02
## [1409,] 0.179450170 0.4386559699 3.574234e-01
## [1410,] 0.424154946 0.3063341278 7.374710e-02
## [1411,] 0.386693968 0.3625255950 1.132892e-01
## [1412,] 0.275519452 0.4362391326 2.302373e-01
## [1413,] 0.362525595 0.3866939680 1.374912e-01
## [1414,] 0.377630828 0.0906313987 7.250512e-03
## [1415,] 0.426168977 0.1482326877 1.718640e-02
## [1416,] 0.349346279 0.3975319727 1.507880e-01
## [1417,] 0.321175019 0.4163379880 1.798991e-01
## [1418,] 0.443086838 0.2436977611 4.467792e-02
## [1419,] 0.426168977 0.1482326877 1.718640e-02
## [1420,] 0.438655970 0.1794501695 2.447048e-02
## [1421,] 0.306334128 0.4241549461 1.957638e-01
## [1422,] 0.179450170 0.4386559699 3.574234e-01
## [1423,] 0.417093250 0.1331148669 1.416116e-02
## [1424,] 0.424154946 0.3063341278 7.374710e-02
## [1425,] 0.000000000 0.0000000000 1.000000e+00
## [1426,] 0.349346279 0.3975319727 1.507880e-01
## [1427,] 0.211473264 0.4440938538 3.108657e-01
## [1428,] 0.417093250 0.1331148669 1.416116e-02
## [1429,] 0.340371253 0.0654560102 4.195898e-03
## [1430,] 0.275519452 0.4362391326 2.302373e-01
## [1431,] 0.275519452 0.4362391326 2.302373e-01
## [1432,] 0.426168977 0.1482326877 1.718640e-02
## [1433,] 0.416337988 0.3211750193 8.258786e-02
## [1434,] 0.275519452 0.4362391326 2.302373e-01
## [1435,] 0.340371253 0.0654560102 4.195898e-03
## [1436,] 0.442218287 0.1953987782 2.877966e-02
## [1437,] 0.275519452 0.4362391326 2.302373e-01
## [1438,] 0.169380014 0.0116813803 2.685375e-04
## [1439,] 0.211473264 0.4440938538 3.108657e-01
## [1440,] 0.377630828 0.0906313987 7.250512e-03
## [1441,] 0.362525595 0.3866939680 1.374912e-01
## [1442,] 0.444093854 0.2114732637 3.356718e-02
## [1443,] 0.291090430 0.4308138364 2.125348e-01
## [1444,] 0.444358195 0.2275981001 3.885821e-02
## [1445,] 0.436239133 0.2755194522 5.800410e-02
## [1446,] 0.054038972 0.3182294988 6.246727e-01
## [1447,] 0.375000000 0.3750000000 1.250000e-01
## [1448,] 0.416337988 0.3211750193 8.258786e-02
## [1449,] 0.440355309 0.2596967205 5.105149e-02
## [1450,] 0.417093250 0.1331148669 1.416116e-02
## [1451,] 0.397531973 0.3493462791 1.023338e-01
## [1452,] 0.204487093 0.0179374643 5.244873e-04
## [1453,] 0.406028666 0.1184250277 1.151354e-02
## [1454,] 0.377630828 0.0906313987 7.250512e-03
## [1455,] 0.306334128 0.4241549461 1.957638e-01
## [1456,] 0.335537578 0.4074384881 1.649156e-01
## [1457,] 0.377630828 0.0906313987 7.250512e-03
## [1458,] 0.406028666 0.1184250277 1.151354e-02
## [1459,] 0.321175019 0.4163379880 1.798991e-01
## [1460,] 0.392899701 0.1042386963 9.218388e-03
## [1461,] 0.362525595 0.3866939680 1.374912e-01
## [1462,] 0.440355309 0.2596967205 5.105149e-02
## [1463,] 0.397531973 0.3493462791 1.023338e-01
## [1464,] 0.442218287 0.1953987782 2.877966e-02
## [1465,] 0.236850055 0.0253767916 9.063140e-04
## [1466,] 0.321175019 0.4163379880 1.798991e-01
## [1467,] 0.444358195 0.2275981001 3.885821e-02
## [1468,] 0.397531973 0.3493462791 1.023338e-01
## [1469,] 0.438655970 0.1794501695 2.447048e-02
## [1470,] 0.211473264 0.4440938538 3.108657e-01
## [1471,] 0.430813836 0.2910904300 6.556091e-02
## [1472,] 0.090631399 0.0030210466 3.356718e-05
## [1473,] 0.318229499 0.0540389715 3.058810e-03
## [1474,] 0.362525595 0.3866939680 1.374912e-01
## [1475,] 0.275519452 0.4362391326 2.302373e-01
## [1476,] 0.046838810 0.0007678494 4.195898e-06
## [1477,] 0.433331375 0.1637029640 2.061445e-02
## [1478,] 0.416337988 0.3211750193 8.258786e-02
## [1479,] 0.306334128 0.4241549461 1.957638e-01
## [1480,] 0.436239133 0.2755194522 5.800410e-02
## [1481,] 0.349346279 0.3975319727 1.507880e-01
## [1482,] 0.386693968 0.3625255950 1.132892e-01
## [1483,] 0.362525595 0.3866939680 1.374912e-01
## [1484,] 0.442218287 0.1953987782 2.877966e-02
## [1485,] 0.444093854 0.2114732637 3.356718e-02
## [1486,] 0.440355309 0.2596967205 5.105149e-02
## [1487,] 0.349346279 0.3975319727 1.507880e-01
## [1488,] 0.349346279 0.3975319727 1.507880e-01
## [1489,] 0.430813836 0.2910904300 6.556091e-02
## [1490,] 0.426168977 0.1482326877 1.718640e-02
## [1491,] 0.430813836 0.2910904300 6.556091e-02
## [1492,] 0.227598100 0.4443581954 2.891855e-01
## [1493,] 0.195398778 0.4422182874 3.336033e-01
## [1494,] 0.375000000 0.3750000000 1.250000e-01
## [1495,] 0.306334128 0.4241549461 1.957638e-01
## [1496,] 0.440355309 0.2596967205 5.105149e-02
## [1497,] 0.360146521 0.0776786613 5.584740e-03
## [1498,] 0.118425028 0.4060286664 4.640328e-01
## [1499,] 0.426168977 0.1482326877 1.718640e-02
## [1500,] 0.440355309 0.2596967205 5.105149e-02
## [1501,] 0.293645732 0.0435030714 2.148300e-03
## [1502,] 0.306334128 0.4241549461 1.957638e-01
## [1503,] 0.424154946 0.3063341278 7.374710e-02
## [1504,] 0.321175019 0.4163379880 1.798991e-01
## [1505,] 0.306334128 0.4241549461 1.957638e-01
## [1506,] 0.179450170 0.4386559699 3.574234e-01
## [1507,] 0.443086838 0.2436977611 4.467792e-02
## [1508,] 0.444358195 0.2275981001 3.885821e-02
## [1509,] 0.291090430 0.4308138364 2.125348e-01
## [1510,] 0.259696720 0.4403553087 2.488965e-01
## [1511,] 0.416337988 0.3211750193 8.258786e-02
## [1512,] 0.340371253 0.0654560102 4.195898e-03
## [1513,] 0.243697761 0.4430868383 2.685375e-01
## [1514,] 0.335537578 0.4074384881 1.649156e-01
## [1515,] 0.392899701 0.1042386963 9.218388e-03
## [1516,] 0.163702964 0.4333313752 3.823512e-01
## [1517,] 0.436239133 0.2755194522 5.800410e-02
## [1518,] 0.377630828 0.0906313987 7.250512e-03
## [1519,] 0.335537578 0.4074384881 1.649156e-01
## [1520,] 0.436239133 0.2755194522 5.800410e-02
## [1521,] 0.259696720 0.4403553087 2.488965e-01
## [1522,] 0.407438488 0.3355375785 9.210835e-02
## [1523,] 0.131453291 0.0066840657 1.132892e-04
## [1524,] 0.426168977 0.1482326877 1.718640e-02
## [1525,] 0.444358195 0.2275981001 3.885821e-02
## [1526,] 0.436239133 0.2755194522 5.800410e-02
## [1527,] 0.000000000 0.0000000000 1.000000e+00
## [1528,] 0.392899701 0.1042386963 9.218388e-03
## [1529,] 0.440355309 0.2596967205 5.105149e-02
## [1530,] 0.442218287 0.1953987782 2.877966e-02
## [1531,] 0.430813836 0.2910904300 6.556091e-02
## [1532,] 0.306334128 0.4241549461 1.957638e-01
## [1533,] 0.416337988 0.3211750193 8.258786e-02
## [1534,] 0.227598100 0.4443581954 2.891855e-01
## [1535,] 0.360146521 0.0776786613 5.584740e-03
## [1536,] 0.360146521 0.0776786613 5.584740e-03
## [1537,] 0.416337988 0.3211750193 8.258786e-02
## [1538,] 0.163702964 0.4333313752 3.823512e-01
## [1539,] 0.275519452 0.4362391326 2.302373e-01
## [1540,] 0.444358195 0.2275981001 3.885821e-02
## [1541,] 0.436239133 0.2755194522 5.800410e-02
## [1542,] 0.397531973 0.3493462791 1.023338e-01
## [1543,] 0.430813836 0.2910904300 6.556091e-02
## [1544,] 0.436239133 0.2755194522 5.800410e-02
## [1545,] 0.362525595 0.3866939680 1.374912e-01
## [1546,] 0.444358195 0.2275981001 3.885821e-02
## [1547,] 0.362525595 0.3866939680 1.374912e-01
## [1548,] 0.211473264 0.4440938538 3.108657e-01
## [1549,] 0.259696720 0.4403553087 2.488965e-01
## [1550,] 0.375000000 0.3750000000 1.250000e-01
## [1551,] 0.417093250 0.1331148669 1.416116e-02
## [1552,] 0.227598100 0.4443581954 2.891855e-01
## [1553,] 0.440355309 0.2596967205 5.105149e-02
## [1554,] 0.417093250 0.1331148669 1.416116e-02
## [1555,] 0.340371253 0.0654560102 4.195898e-03
## [1556,] 0.375000000 0.3750000000 1.250000e-01
## [1557,] 0.349346279 0.3975319727 1.507880e-01
## [1558,] 0.169380014 0.0116813803 2.685375e-04
## [1559,] 0.397531973 0.3493462791 1.023338e-01
## [1560,] 0.227598100 0.4443581954 2.891855e-01
## [1561,] 0.440355309 0.2596967205 5.105149e-02
## [1562,] 0.406028666 0.1184250277 1.151354e-02
## [1563,] 0.444358195 0.2275981001 3.885821e-02
## [1564,] 0.148232688 0.4261689772 4.084119e-01
## [1565,] 0.438655970 0.1794501695 2.447048e-02
## [1566,] 0.195398778 0.4422182874 3.336033e-01
## [1567,] 0.426168977 0.1482326877 1.718640e-02
## [1568,] 0.335537578 0.4074384881 1.649156e-01
## [1569,] 0.417093250 0.1331148669 1.416116e-02
## [1570,] 0.426168977 0.1482326877 1.718640e-02
## [1571,] 0.444358195 0.2275981001 3.885821e-02
## [1572,] 0.227598100 0.4443581954 2.891855e-01
## [1573,] 0.375000000 0.3750000000 1.250000e-01
## [1574,] 0.443086838 0.2436977611 4.467792e-02
## [1575,] 0.375000000 0.3750000000 1.250000e-01
## [1576,] 0.227598100 0.4443581954 2.891855e-01
## [1577,] 0.444358195 0.2275981001 3.885821e-02
## [1578,] 0.163702964 0.4333313752 3.823512e-01
## [1579,] 0.266544426 0.0339238361 1.439193e-03
## [1580,] 0.321175019 0.4163379880 1.798991e-01
## [1581,] 0.204487093 0.0179374643 5.244873e-04
## [1582,] 0.438655970 0.1794501695 2.447048e-02
## [1583,] 0.046838810 0.0007678494 4.195898e-06
## [1584,] 0.430813836 0.2910904300 6.556091e-02
## [1585,] 0.443086838 0.2436977611 4.467792e-02
## [1586,] 0.444093854 0.2114732637 3.356718e-02
## [1587,] 0.163702964 0.4333313752 3.823512e-01
## [1588,] 0.416337988 0.3211750193 8.258786e-02
## [1589,] 0.406028666 0.1184250277 1.151354e-02
## [1590,] 0.442218287 0.1953987782 2.877966e-02
## [1591,] 0.442218287 0.1953987782 2.877966e-02
## [1592,] 0.416337988 0.3211750193 8.258786e-02
## [1593,] 0.424154946 0.3063341278 7.374710e-02
## [1594,] 0.444358195 0.2275981001 3.885821e-02
## [1595,] 0.417093250 0.1331148669 1.416116e-02
## [1596,] 0.433331375 0.1637029640 2.061445e-02
## [1597,] 0.163702964 0.4333313752 3.823512e-01
## [1598,] 0.416337988 0.3211750193 8.258786e-02
## [1599,] 0.440355309 0.2596967205 5.105149e-02
## [1600,] 0.416337988 0.3211750193 8.258786e-02
## [1601,] 0.433331375 0.1637029640 2.061445e-02
## [1602,] 0.335537578 0.4074384881 1.649156e-01
## [1603,] 0.443086838 0.2436977611 4.467792e-02
## [1604,] 0.440355309 0.2596967205 5.105149e-02
## [1605,] 0.386693968 0.3625255950 1.132892e-01
## [1606,] 0.291090430 0.4308138364 2.125348e-01
## [1607,] 0.148232688 0.4261689772 4.084119e-01
## [1608,] 0.360146521 0.0776786613 5.584740e-03
## [1609,] 0.440355309 0.2596967205 5.105149e-02
## [1610,] 0.243697761 0.4430868383 2.685375e-01
## [1611,] 0.426168977 0.1482326877 1.718640e-02
## [1612,] 0.430813836 0.2910904300 6.556091e-02
## [1613,] 0.407438488 0.3355375785 9.210835e-02
## [1614,] 0.397531973 0.3493462791 1.023338e-01
## [1615,] 0.416337988 0.3211750193 8.258786e-02
## [1616,] 0.426168977 0.1482326877 1.718640e-02
## [1617,] 0.406028666 0.1184250277 1.151354e-02
## [1618,] 0.291090430 0.4308138364 2.125348e-01
## [1619,] 0.169380014 0.0116813803 2.685375e-04
## [1620,] 0.426168977 0.1482326877 1.718640e-02
## [1621,] 0.386693968 0.3625255950 1.132892e-01
## [1622,] 0.375000000 0.3750000000 1.250000e-01
## [1623,] 0.397531973 0.3493462791 1.023338e-01
## [1624,] 0.433331375 0.1637029640 2.061445e-02
## [1625,] 0.362525595 0.3866939680 1.374912e-01
## [1626,] 0.291090430 0.4308138364 2.125348e-01
## [1627,] 0.416337988 0.3211750193 8.258786e-02
## [1628,] 0.443086838 0.2436977611 4.467792e-02
## [1629,] 0.397531973 0.3493462791 1.023338e-01
## [1630,] 0.436239133 0.2755194522 5.800410e-02
## [1631,] 0.386693968 0.3625255950 1.132892e-01
## [1632,] 0.375000000 0.3750000000 1.250000e-01
## [1633,] 0.349346279 0.3975319727 1.507880e-01
## [1634,] 0.243697761 0.4430868383 2.685375e-01
## [1635,] 0.406028666 0.1184250277 1.151354e-02
## [1636,] 0.291090430 0.4308138364 2.125348e-01
## [1637,] 0.266544426 0.0339238361 1.439193e-03
## [1638,] 0.033923836 0.2665444262 6.980925e-01
## [1639,] 0.000000000 0.0000000000 0.000000e+00
## [1640,] 0.335537578 0.4074384881 1.649156e-01
## [1641,] 0.349346279 0.3975319727 1.507880e-01
## [1642,] 0.424154946 0.3063341278 7.374710e-02
## [1643,] 0.360146521 0.0776786613 5.584740e-03
## [1644,] 0.386693968 0.3625255950 1.132892e-01
## [1645,] 0.179450170 0.4386559699 3.574234e-01
## [1646,] 0.236850055 0.0253767916 9.063140e-04
## [1647,] 0.386693968 0.3625255950 1.132892e-01
## [1648,] 0.306334128 0.4241549461 1.957638e-01
## [1649,] 0.386693968 0.3625255950 1.132892e-01
## [1650,] 0.033923836 0.2665444262 6.980925e-01
## [1651,] 0.377630828 0.0906313987 7.250512e-03
## [1652,] 0.386693968 0.3625255950 1.132892e-01
## [1653,] 0.360146521 0.0776786613 5.584740e-03
## [1654,] 0.443086838 0.2436977611 4.467792e-02
## [1655,] 0.335537578 0.4074384881 1.649156e-01
## [1656,] 0.407438488 0.3355375785 9.210835e-02
## [1657,] 0.424154946 0.3063341278 7.374710e-02
## [1658,] 0.443086838 0.2436977611 4.467792e-02
## [1659,] 0.392899701 0.1042386963 9.218388e-03
## [1660,] 0.046838810 0.0007678494 4.195898e-06
## [1661,] 0.430813836 0.2910904300 6.556091e-02
## [1662,] 0.275519452 0.4362391326 2.302373e-01
## [1663,] 0.291090430 0.4308138364 2.125348e-01
## [1664,] 0.436239133 0.2755194522 5.800410e-02
## [1665,] 0.318229499 0.0540389715 3.058810e-03
## [1666,] 0.426168977 0.1482326877 1.718640e-02
## [1667,] 0.397531973 0.3493462791 1.023338e-01
## [1668,] 0.417093250 0.1331148669 1.416116e-02
## [1669,] 0.433331375 0.1637029640 2.061445e-02
## [1670,] 0.443086838 0.2436977611 4.467792e-02
## [1671,] 0.397531973 0.3493462791 1.023338e-01
## [1672,] 0.416337988 0.3211750193 8.258786e-02
## [1673,] 0.306334128 0.4241549461 1.957638e-01
## [1674,] 0.440355309 0.2596967205 5.105149e-02
## [1675,] 0.407438488 0.3355375785 9.210835e-02
## [1676,] 0.424154946 0.3063341278 7.374710e-02
## [1677,] 0.424154946 0.3063341278 7.374710e-02
## [1678,] 0.407438488 0.3355375785 9.210835e-02
## [1679,] 0.444093854 0.2114732637 3.356718e-02
## [1680,] 0.417093250 0.1331148669 1.416116e-02
## [1681,] 0.335537578 0.4074384881 1.649156e-01
## [1682,] 0.417093250 0.1331148669 1.416116e-02
## [1683,] 0.406028666 0.1184250277 1.151354e-02
## [1684,] 0.444358195 0.2275981001 3.885821e-02
## [1685,] 0.438655970 0.1794501695 2.447048e-02
## [1686,] 0.442218287 0.1953987782 2.877966e-02
## [1687,] 0.443086838 0.2436977611 4.467792e-02
## [1688,] 0.275519452 0.4362391326 2.302373e-01
## [1689,] 0.375000000 0.3750000000 1.250000e-01
## [1690,] 0.406028666 0.1184250277 1.151354e-02
## [1691,] 0.386693968 0.3625255950 1.132892e-01
## [1692,] 0.386693968 0.3625255950 1.132892e-01
## [1693,] 0.406028666 0.1184250277 1.151354e-02
## [1694,] 0.377630828 0.0906313987 7.250512e-03
## [1695,] 0.417093250 0.1331148669 1.416116e-02
## [1696,] 0.275519452 0.4362391326 2.302373e-01
## [1697,] 0.407438488 0.3355375785 9.210835e-02
## [1698,] 0.375000000 0.3750000000 1.250000e-01
## [1699,] 0.442218287 0.1953987782 2.877966e-02
## [1700,] 0.321175019 0.4163379880 1.798991e-01
## [1701,] 0.275519452 0.4362391326 2.302373e-01
## [1702,] 0.275519452 0.4362391326 2.302373e-01
## [1703,] 0.386693968 0.3625255950 1.132892e-01
## [1704,] 0.397531973 0.3493462791 1.023338e-01
## [1705,] 0.335537578 0.4074384881 1.649156e-01
## [1706,] 0.443086838 0.2436977611 4.467792e-02
## [1707,] 0.433331375 0.1637029640 2.061445e-02
## [1708,] 0.443086838 0.2436977611 4.467792e-02
## [1709,] 0.169380014 0.0116813803 2.685375e-04
## [1710,] 0.386693968 0.3625255950 1.132892e-01
## [1711,] 0.443086838 0.2436977611 4.467792e-02
## [1712,] 0.416337988 0.3211750193 8.258786e-02
## [1713,] 0.377630828 0.0906313987 7.250512e-03
## [1714,] 0.407438488 0.3355375785 9.210835e-02
## [1715,] 0.406028666 0.1184250277 1.151354e-02
## [1716,] 0.321175019 0.4163379880 1.798991e-01
## [1717,] 0.406028666 0.1184250277 1.151354e-02
## [1718,] 0.444358195 0.2275981001 3.885821e-02
## [1719,] 0.349346279 0.3975319727 1.507880e-01
## [1720,] 0.443086838 0.2436977611 4.467792e-02
## [1721,] 0.118425028 0.4060286664 4.640328e-01
## [1722,] 0.443086838 0.2436977611 4.467792e-02
## [1723,] 0.335537578 0.4074384881 1.649156e-01
## [1724,] 0.406028666 0.1184250277 1.151354e-02
## [1725,] 0.416337988 0.3211750193 8.258786e-02
## [1726,] 0.442218287 0.1953987782 2.877966e-02
## [1727,] 0.375000000 0.3750000000 1.250000e-01
## [1728,] 0.321175019 0.4163379880 1.798991e-01
## [1729,] 0.118425028 0.4060286664 4.640328e-01
## [1730,] 0.440355309 0.2596967205 5.105149e-02
## [1731,] 0.306334128 0.4241549461 1.957638e-01
## [1732,] 0.236850055 0.0253767916 9.063140e-04
## [1733,] 0.179450170 0.4386559699 3.574234e-01
## [1734,] 0.163702964 0.4333313752 3.823512e-01
## [1735,] 0.293645732 0.0435030714 2.148300e-03
## [1736,] 0.416337988 0.3211750193 8.258786e-02
## [1737,] 0.204487093 0.0179374643 5.244873e-04
## [1738,] 0.392899701 0.1042386963 9.218388e-03
## [1739,] 0.430813836 0.2910904300 6.556091e-02
## [1740,] 0.386693968 0.3625255950 1.132892e-01
## [1741,] 0.291090430 0.4308138364 2.125348e-01
## [1742,] 0.386693968 0.3625255950 1.132892e-01
## [1743,] 0.163702964 0.4333313752 3.823512e-01
## [1744,] 0.259696720 0.4403553087 2.488965e-01
## [1745,] 0.077678661 0.3601465208 5.565901e-01
## [1746,] 0.392899701 0.1042386963 9.218388e-03
## [1747,] 0.444093854 0.2114732637 3.356718e-02
## [1748,] 0.424154946 0.3063341278 7.374710e-02
## [1749,] 0.392899701 0.1042386963 9.218388e-03
## [1750,] 0.375000000 0.3750000000 1.250000e-01
## [1751,] 0.293645732 0.0435030714 2.148300e-03
## [1752,] 0.377630828 0.0906313987 7.250512e-03
## [1753,] 0.443086838 0.2436977611 4.467792e-02
## [1754,] 0.424154946 0.3063341278 7.374710e-02
## [1755,] 0.133114867 0.4170932496 4.356307e-01
## [1756,] 0.306334128 0.4241549461 1.957638e-01
## [1757,] 0.275519452 0.4362391326 2.302373e-01
## [1758,] 0.442218287 0.1953987782 2.877966e-02
## [1759,] 0.407438488 0.3355375785 9.210835e-02
## [1760,] 0.442218287 0.1953987782 2.877966e-02
## [1761,] 0.243697761 0.4430868383 2.685375e-01
## [1762,] 0.349346279 0.3975319727 1.507880e-01
## [1763,] 0.436239133 0.2755194522 5.800410e-02
## [1764,] 0.407438488 0.3355375785 9.210835e-02
## [1765,] 0.430813836 0.2910904300 6.556091e-02
## [1766,] 0.397531973 0.3493462791 1.023338e-01
## [1767,] 0.424154946 0.3063341278 7.374710e-02
## [1768,] 0.438655970 0.1794501695 2.447048e-02
## [1769,] 0.360146521 0.0776786613 5.584740e-03
## [1770,] 0.090631399 0.0030210466 3.356718e-05
## [1771,] 0.406028666 0.1184250277 1.151354e-02
## [1772,] 0.438655970 0.1794501695 2.447048e-02
## [1773,] 0.392899701 0.1042386963 9.218388e-03
## [1774,] 0.340371253 0.0654560102 4.195898e-03
## [1775,] 0.436239133 0.2755194522 5.800410e-02
## [1776,] 0.148232688 0.4261689772 4.084119e-01
## [1777,] 0.442218287 0.1953987782 2.877966e-02
## [1778,] 0.377630828 0.0906313987 7.250512e-03
## [1779,] 0.293645732 0.0435030714 2.148300e-03
## [1780,] 0.424154946 0.3063341278 7.374710e-02
## [1781,] 0.386693968 0.3625255950 1.132892e-01
## [1782,] 0.321175019 0.4163379880 1.798991e-01
## [1783,] 0.436239133 0.2755194522 5.800410e-02
## [1784,] 0.266544426 0.0339238361 1.439193e-03
## [1785,] 0.335537578 0.4074384881 1.649156e-01
## [1786,] 0.444093854 0.2114732637 3.356718e-02
## [1787,] 0.360146521 0.0776786613 5.584740e-03
## [1788,] 0.259696720 0.4403553087 2.488965e-01
## [1789,] 0.362525595 0.3866939680 1.374912e-01
## [1790,] 0.204487093 0.0179374643 5.244873e-04
## [1791,] 0.195398778 0.4422182874 3.336033e-01
## [1792,] 0.065456010 0.3403712531 5.899768e-01
## [1793,] 0.227598100 0.4443581954 2.891855e-01
## [1794,] 0.266544426 0.0339238361 1.439193e-03
## [1795,] 0.386693968 0.3625255950 1.132892e-01
## [1796,] 0.335537578 0.4074384881 1.649156e-01
## [1797,] 0.424154946 0.3063341278 7.374710e-02
## [1798,] 0.430813836 0.2910904300 6.556091e-02
## [1799,] 0.349346279 0.3975319727 1.507880e-01
## [1800,] 0.430813836 0.2910904300 6.556091e-02
## [1801,] 0.340371253 0.0654560102 4.195898e-03
## [1802,] 0.306334128 0.4241549461 1.957638e-01
## [1803,] 0.438655970 0.1794501695 2.447048e-02
## [1804,] 0.054038972 0.3182294988 6.246727e-01
## [1805,] 0.204487093 0.0179374643 5.244873e-04
## [1806,] 0.436239133 0.2755194522 5.800410e-02
## [1807,] 0.318229499 0.0540389715 3.058810e-03
## [1808,] 0.360146521 0.0776786613 5.584740e-03
## [1809,] 0.440355309 0.2596967205 5.105149e-02
## [1810,] 0.169380014 0.0116813803 2.685375e-04
## [1811,] 0.444358195 0.2275981001 3.885821e-02
## [1812,] 0.375000000 0.3750000000 1.250000e-01
## [1813,] 0.436239133 0.2755194522 5.800410e-02
## [1814,] 0.291090430 0.4308138364 2.125348e-01
## [1815,] 0.397531973 0.3493462791 1.023338e-01
## [1816,] 0.377630828 0.0906313987 7.250512e-03
## [1817,] 0.275519452 0.4362391326 2.302373e-01
## [1818,] 0.430813836 0.2910904300 6.556091e-02
## [1819,] 0.433331375 0.1637029640 2.061445e-02
## [1820,] 0.243697761 0.4430868383 2.685375e-01
## [1821,] 0.077678661 0.3601465208 5.565901e-01
## [1822,] 0.090631399 0.3776308281 5.244873e-01
## [1823,] 0.335537578 0.4074384881 1.649156e-01
## [1824,] 0.118425028 0.4060286664 4.640328e-01
## [1825,] 0.377630828 0.0906313987 7.250512e-03
## [1826,] 0.430813836 0.2910904300 6.556091e-02
## [1827,] 0.306334128 0.4241549461 1.957638e-01
## [1828,] 0.442218287 0.1953987782 2.877966e-02
## [1829,] 0.407438488 0.3355375785 9.210835e-02
## [1830,] 0.321175019 0.4163379880 1.798991e-01
## [1831,] 0.392899701 0.1042386963 9.218388e-03
## [1832,] 0.000000000 0.0000000000 0.000000e+00
## [1833,] 0.375000000 0.3750000000 1.250000e-01
## [1834,] 0.443086838 0.2436977611 4.467792e-02
## [1835,] 0.433331375 0.1637029640 2.061445e-02
## [1836,] 0.407438488 0.3355375785 9.210835e-02
## [1837,] 0.443086838 0.2436977611 4.467792e-02
## [1838,] 0.444358195 0.2275981001 3.885821e-02
## [1839,] 0.436239133 0.2755194522 5.800410e-02
## [1840,] 0.442218287 0.1953987782 2.877966e-02
## [1841,] 0.243697761 0.4430868383 2.685375e-01
## [1842,] 0.443086838 0.2436977611 4.467792e-02
## [1843,] 0.318229499 0.0540389715 3.058810e-03
## [1844,] 0.392899701 0.1042386963 9.218388e-03
## [1845,] 0.424154946 0.3063341278 7.374710e-02
## [1846,] 0.444093854 0.2114732637 3.356718e-02
## [1847,] 0.426168977 0.1482326877 1.718640e-02
## [1848,] 0.440355309 0.2596967205 5.105149e-02
## [1849,] 0.090631399 0.0030210466 3.356718e-05
## [1850,] 0.444093854 0.2114732637 3.356718e-02
## [1851,] 0.430813836 0.2910904300 6.556091e-02
## [1852,] 0.362525595 0.3866939680 1.374912e-01
## [1853,] 0.291090430 0.4308138364 2.125348e-01
## [1854,] 0.236850055 0.0253767916 9.063140e-04
## [1855,] 0.440355309 0.2596967205 5.105149e-02
## [1856,] 0.442218287 0.1953987782 2.877966e-02
## [1857,] 0.436239133 0.2755194522 5.800410e-02
## [1858,] 0.266544426 0.0339238361 1.439193e-03
## [1859,] 0.416337988 0.3211750193 8.258786e-02
## [1860,] 0.443086838 0.2436977611 4.467792e-02
## [1861,] 0.430813836 0.2910904300 6.556091e-02
## [1862,] 0.362525595 0.3866939680 1.374912e-01
## [1863,] 0.436239133 0.2755194522 5.800410e-02
## [1864,] 0.046838810 0.0007678494 4.195898e-06
## [1865,] 0.424154946 0.3063341278 7.374710e-02
## [1866,] 0.293645732 0.0435030714 2.148300e-03
## [1867,] 0.306334128 0.4241549461 1.957638e-01
## [1868,] 0.406028666 0.1184250277 1.151354e-02
## [1869,] 0.375000000 0.3750000000 1.250000e-01
## [1870,] 0.433331375 0.1637029640 2.061445e-02
## [1871,] 0.426168977 0.1482326877 1.718640e-02
## [1872,] 0.204487093 0.0179374643 5.244873e-04
## [1873,] 0.211473264 0.4440938538 3.108657e-01
## [1874,] 0.397531973 0.3493462791 1.023338e-01
## [1875,] 0.386693968 0.3625255950 1.132892e-01
## [1876,] 0.433331375 0.1637029640 2.061445e-02
## [1877,] 0.291090430 0.4308138364 2.125348e-01
## [1878,] 0.433331375 0.1637029640 2.061445e-02
## [1879,] 0.442218287 0.1953987782 2.877966e-02
## [1880,] 0.318229499 0.0540389715 3.058810e-03
## [1881,] 0.148232688 0.4261689772 4.084119e-01
## [1882,] 0.293645732 0.0435030714 2.148300e-03
## [1883,] 0.440355309 0.2596967205 5.105149e-02
## [1884,] 0.169380014 0.0116813803 2.685375e-04
## [1885,] 0.407438488 0.3355375785 9.210835e-02
## [1886,] 0.204487093 0.0179374643 5.244873e-04
## [1887,] 0.424154946 0.3063341278 7.374710e-02
## [1888,] 0.090631399 0.0030210466 3.356718e-05
## [1889,] 0.430813836 0.2910904300 6.556091e-02
## [1890,] 0.407438488 0.3355375785 9.210835e-02
## [1891,] 0.417093250 0.1331148669 1.416116e-02
## [1892,] 0.179450170 0.4386559699 3.574234e-01
## [1893,] 0.444093854 0.2114732637 3.356718e-02
## [1894,] 0.407438488 0.3355375785 9.210835e-02
## [1895,] 0.163702964 0.4333313752 3.823512e-01
## [1896,] 0.243697761 0.4430868383 2.685375e-01
## [1897,] 0.204487093 0.0179374643 5.244873e-04
## [1898,] 0.362525595 0.3866939680 1.374912e-01
## [1899,] 0.433331375 0.1637029640 2.061445e-02
## [1900,] 0.444093854 0.2114732637 3.356718e-02
## [1901,] 0.438655970 0.1794501695 2.447048e-02
## [1902,] 0.406028666 0.1184250277 1.151354e-02
## [1903,] 0.440355309 0.2596967205 5.105149e-02
## [1904,] 0.293645732 0.0435030714 2.148300e-03
## [1905,] 0.293645732 0.0435030714 2.148300e-03
## [1906,] 0.266544426 0.0339238361 1.439193e-03
## [1907,] 0.243697761 0.4430868383 2.685375e-01
## [1908,] 0.259696720 0.4403553087 2.488965e-01
## [1909,] 0.377630828 0.0906313987 7.250512e-03
## [1910,] 0.424154946 0.3063341278 7.374710e-02
## [1911,] 0.360146521 0.0776786613 5.584740e-03
## [1912,] 0.349346279 0.3975319727 1.507880e-01
## [1913,] 0.442218287 0.1953987782 2.877966e-02
## [1914,] 0.104238696 0.3928997013 4.936432e-01
## [1915,] 0.426168977 0.1482326877 1.718640e-02
## [1916,] 0.362525595 0.3866939680 1.374912e-01
## [1917,] 0.444093854 0.2114732637 3.356718e-02
## [1918,] 0.291090430 0.4308138364 2.125348e-01
## [1919,] 0.444358195 0.2275981001 3.885821e-02
## [1920,] 0.306334128 0.4241549461 1.957638e-01
## [1921,] 0.375000000 0.3750000000 1.250000e-01
## [1922,] 0.444358195 0.2275981001 3.885821e-02
## [1923,] 0.406028666 0.1184250277 1.151354e-02
## [1924,] 0.397531973 0.3493462791 1.023338e-01
## [1925,] 0.443086838 0.2436977611 4.467792e-02
## [1926,] 0.349346279 0.3975319727 1.507880e-01
## [1927,] 0.340371253 0.0654560102 4.195898e-03
## [1928,] 0.291090430 0.4308138364 2.125348e-01
## [1929,] 0.424154946 0.3063341278 7.374710e-02
## [1930,] 0.377630828 0.0906313987 7.250512e-03
## [1931,] 0.443086838 0.2436977611 4.467792e-02
## [1932,] 0.375000000 0.3750000000 1.250000e-01
## [1933,] 0.430813836 0.2910904300 6.556091e-02
## [1934,] 0.424154946 0.3063341278 7.374710e-02
## [1935,] 0.406028666 0.1184250277 1.151354e-02
## [1936,] 0.426168977 0.1482326877 1.718640e-02
## [1937,] 0.438655970 0.1794501695 2.447048e-02
## [1938,] 0.349346279 0.3975319727 1.507880e-01
## [1939,] 0.211473264 0.4440938538 3.108657e-01
## [1940,] 0.438655970 0.1794501695 2.447048e-02
## [1941,] 0.440355309 0.2596967205 5.105149e-02
## [1942,] 0.275519452 0.4362391326 2.302373e-01
## [1943,] 0.424154946 0.3063341278 7.374710e-02
## [1944,] 0.416337988 0.3211750193 8.258786e-02
## [1945,] 0.266544426 0.0339238361 1.439193e-03
## [1946,] 0.335537578 0.4074384881 1.649156e-01
## [1947,] 0.377630828 0.0906313987 7.250512e-03
## [1948,] 0.360146521 0.0776786613 5.584740e-03
## [1949,] 0.204487093 0.0179374643 5.244873e-04
## [1950,] 0.386693968 0.3625255950 1.132892e-01
## [1951,] 0.424154946 0.3063341278 7.374710e-02
## [1952,] 0.349346279 0.3975319727 1.507880e-01
## [1953,] 0.438655970 0.1794501695 2.447048e-02
## [1954,] 0.204487093 0.0179374643 5.244873e-04
## [1955,] 0.349346279 0.3975319727 1.507880e-01
## [1956,] 0.397531973 0.3493462791 1.023338e-01
## [1957,] 0.426168977 0.1482326877 1.718640e-02
## [1958,] 0.426168977 0.1482326877 1.718640e-02
## [1959,] 0.430813836 0.2910904300 6.556091e-02
## [1960,] 0.430813836 0.2910904300 6.556091e-02
## [1961,] 0.227598100 0.4443581954 2.891855e-01
## [1962,] 0.321175019 0.4163379880 1.798991e-01
## [1963,] 0.090631399 0.0030210466 3.356718e-05
## [1964,] 0.443086838 0.2436977611 4.467792e-02
## [1965,] 0.386693968 0.3625255950 1.132892e-01
## [1966,] 0.430813836 0.2910904300 6.556091e-02
## [1967,] 0.275519452 0.4362391326 2.302373e-01
## [1968,] 0.291090430 0.4308138364 2.125348e-01
## [1969,] 0.444093854 0.2114732637 3.356718e-02
## [1970,] 0.335537578 0.4074384881 1.649156e-01
## [1971,] 0.443086838 0.2436977611 4.467792e-02
## [1972,] 0.360146521 0.0776786613 5.584740e-03
## [1973,] 0.444358195 0.2275981001 3.885821e-02
## [1974,] 0.362525595 0.3866939680 1.374912e-01
## [1975,] 0.362525595 0.3866939680 1.374912e-01
## [1976,] 0.259696720 0.4403553087 2.488965e-01
## [1977,] 0.377630828 0.0906313987 7.250512e-03
## [1978,] 0.275519452 0.4362391326 2.302373e-01
## [1979,] 0.104238696 0.3928997013 4.936432e-01
## [1980,] 0.349346279 0.3975319727 1.507880e-01
## [1981,] 0.416337988 0.3211750193 8.258786e-02
## [1982,] 0.306334128 0.4241549461 1.957638e-01
## [1983,] 0.204487093 0.0179374643 5.244873e-04
## [1984,] 0.025376792 0.2368500554 7.368668e-01
## [1985,] 0.442218287 0.1953987782 2.877966e-02
## [1986,] 0.291090430 0.4308138364 2.125348e-01
## [1987,] 0.266544426 0.0339238361 1.439193e-03
## [1988,] 0.118425028 0.4060286664 4.640328e-01
## [1989,] 0.163702964 0.4333313752 3.823512e-01
## [1990,] 0.424154946 0.3063341278 7.374710e-02
## [1991,] 0.406028666 0.1184250277 1.151354e-02
## [1992,] 0.430813836 0.2910904300 6.556091e-02
## [1993,] 0.442218287 0.1953987782 2.877966e-02
## [1994,] 0.293645732 0.0435030714 2.148300e-03
## [1995,] 0.444358195 0.2275981001 3.885821e-02
## [1996,] 0.416337988 0.3211750193 8.258786e-02
## [1997,] 0.443086838 0.2436977611 4.467792e-02
## [1998,] 0.349346279 0.3975319727 1.507880e-01
## [1999,] 0.430813836 0.2910904300 6.556091e-02
## [2000,] 0.335537578 0.4074384881 1.649156e-01
## [2001,] 0.362525595 0.3866939680 1.374912e-01
## [2002,] 0.306334128 0.4241549461 1.957638e-01
## [2003,] 0.340371253 0.0654560102 4.195898e-03
## [2004,] 0.340371253 0.0654560102 4.195898e-03
## [2005,] 0.293645732 0.0435030714 2.148300e-03
## [2006,] 0.416337988 0.3211750193 8.258786e-02
## [2007,] 0.033923836 0.2665444262 6.980925e-01
## [2008,] 0.392899701 0.1042386963 9.218388e-03
## [2009,] 0.443086838 0.2436977611 4.467792e-02
## [2010,] 0.444093854 0.2114732637 3.356718e-02
## [2011,] 0.436239133 0.2755194522 5.800410e-02
## [2012,] 0.362525595 0.3866939680 1.374912e-01
## [2013,] 0.349346279 0.3975319727 1.507880e-01
## [2014,] 0.443086838 0.2436977611 4.467792e-02
## [2015,] 0.266544426 0.0339238361 1.439193e-03
## [2016,] 0.397531973 0.3493462791 1.023338e-01
## [2017,] 0.104238696 0.3928997013 4.936432e-01
## [2018,] 0.424154946 0.3063341278 7.374710e-02
## [2019,] 0.417093250 0.1331148669 1.416116e-02
## [2020,] 0.360146521 0.0776786613 5.584740e-03
## [2021,] 0.318229499 0.0540389715 3.058810e-03
## [2022,] 0.443086838 0.2436977611 4.467792e-02
## [2023,] 0.438655970 0.1794501695 2.447048e-02
## [2024,] 0.386693968 0.3625255950 1.132892e-01
## [2025,] 0.321175019 0.4163379880 1.798991e-01
## [2026,] 0.444093854 0.2114732637 3.356718e-02
## [2027,] 0.065456010 0.3403712531 5.899768e-01
## [2028,] 0.236850055 0.0253767916 9.063140e-04
## [2029,] 0.169380014 0.0116813803 2.685375e-04
## [2030,] 0.360146521 0.0776786613 5.584740e-03
## [2031,] 0.444093854 0.2114732637 3.356718e-02
## [2032,] 0.054038972 0.3182294988 6.246727e-01
## [2033,] 0.406028666 0.1184250277 1.151354e-02
## [2034,] 0.406028666 0.1184250277 1.151354e-02
## [2035,] 0.417093250 0.1331148669 1.416116e-02
## [2036,] 0.438655970 0.1794501695 2.447048e-02
## [2037,] 0.407438488 0.3355375785 9.210835e-02
## [2038,] 0.227598100 0.4443581954 2.891855e-01
## [2039,] 0.377630828 0.0906313987 7.250512e-03
## [2040,] 0.306334128 0.4241549461 1.957638e-01
## [2041,] 0.392899701 0.1042386963 9.218388e-03
## [2042,] 0.426168977 0.1482326877 1.718640e-02
## [2043,] 0.397531973 0.3493462791 1.023338e-01
## [2044,] 0.360146521 0.0776786613 5.584740e-03
## [2045,] 0.243697761 0.4430868383 2.685375e-01
## [2046,] 0.440355309 0.2596967205 5.105149e-02
## [2047,] 0.275519452 0.4362391326 2.302373e-01
## [2048,] 0.335537578 0.4074384881 1.649156e-01
## [2049,] 0.321175019 0.4163379880 1.798991e-01
## [2050,] 0.442218287 0.1953987782 2.877966e-02
## [2051,] 0.433331375 0.1637029640 2.061445e-02
## [2052,] 0.443086838 0.2436977611 4.467792e-02
## [2053,] 0.306334128 0.4241549461 1.957638e-01
## [2054,] 0.442218287 0.1953987782 2.877966e-02
## [2055,] 0.444358195 0.2275981001 3.885821e-02
## [2056,] 0.397531973 0.3493462791 1.023338e-01
## [2057,] 0.349346279 0.3975319727 1.507880e-01
## [2058,] 0.397531973 0.3493462791 1.023338e-01
## [2059,] 0.340371253 0.0654560102 4.195898e-03
## [2060,] 0.133114867 0.4170932496 4.356307e-01
## [2061,] 0.436239133 0.2755194522 5.800410e-02
## [2062,] 0.243697761 0.4430868383 2.685375e-01
## [2063,] 0.375000000 0.3750000000 1.250000e-01
## [2064,] 0.424154946 0.3063341278 7.374710e-02
## [2065,] 0.386693968 0.3625255950 1.132892e-01
## [2066,] 0.436239133 0.2755194522 5.800410e-02
## [2067,] 0.377630828 0.0906313987 7.250512e-03
## [2068,] 0.392899701 0.1042386963 9.218388e-03
## [2069,] 0.360146521 0.0776786613 5.584740e-03
## [2070,] 0.442218287 0.1953987782 2.877966e-02
## [2071,] 0.275519452 0.4362391326 2.302373e-01
## [2072,] 0.424154946 0.3063341278 7.374710e-02
## [2073,] 0.266544426 0.0339238361 1.439193e-03
## [2074,] 0.392899701 0.1042386963 9.218388e-03
## [2075,] 0.349346279 0.3975319727 1.507880e-01
## [2076,] 0.266544426 0.0339238361 1.439193e-03
## [2077,] 0.362525595 0.3866939680 1.374912e-01
## [2078,] 0.377630828 0.0906313987 7.250512e-03
## [2079,] 0.443086838 0.2436977611 4.467792e-02
## [2080,] 0.426168977 0.1482326877 1.718640e-02
## [2081,] 0.436239133 0.2755194522 5.800410e-02
## [2082,] 0.377630828 0.0906313987 7.250512e-03
## [2083,] 0.293645732 0.0435030714 2.148300e-03
## [2084,] 0.360146521 0.0776786613 5.584740e-03
## [2085,] 0.306334128 0.4241549461 1.957638e-01
## [2086,] 0.349346279 0.3975319727 1.507880e-01
## [2087,] 0.375000000 0.3750000000 1.250000e-01
## [2088,] 0.321175019 0.4163379880 1.798991e-01
## [2089,] 0.443086838 0.2436977611 4.467792e-02
## [2090,] 0.335537578 0.4074384881 1.649156e-01
## [2091,] 0.275519452 0.4362391326 2.302373e-01
## [2092,] 0.377630828 0.0906313987 7.250512e-03
## [2093,] 0.349346279 0.3975319727 1.507880e-01
## [2094,] 0.406028666 0.1184250277 1.151354e-02
## [2095,] 0.362525595 0.3866939680 1.374912e-01
## [2096,] 0.293645732 0.0435030714 2.148300e-03
## [2097,] 0.392899701 0.1042386963 9.218388e-03
## [2098,] 0.392899701 0.1042386963 9.218388e-03
## [2099,] 0.424154946 0.3063341278 7.374710e-02
## [2100,] 0.377630828 0.0906313987 7.250512e-03
## [2101,] 0.318229499 0.0540389715 3.058810e-03
## [2102,] 0.291090430 0.4308138364 2.125348e-01
## attr(,"degree")
## [1] 3
## attr(,"knots")
## numeric(0)
## attr(,"Boundary.knots")
## [1] 18 80
## attr(,"intercept")
## [1] FALSE
## attr(,"class")
## [1] "bs"     "basis"  "matrix"

Notes and further reading

  • Level 1 feature creation (raw data to covariates)
  • Science is key. Google “feature extraction for [data type]”
  • Err on overcreation of features
  • In some applications (images, voices) automated feature creation is possible/necessary
  • Level 2 feature creation (covariates to new covariates)
  • The function preProcess in caret will handle some preprocessing.
  • Create new covariates if you think they will improve fit
  • Use exploratory analysis on the training set for creating them
  • Be careful about overfitting!
  • preprocessing with caret
  • If you want to fit spline models, use the gam method in the caret package which allows smoothing of multiple variables.
  • More on feature creation/data tidying in the Obtaining Data course from the Data Science course track.

Preprocessing with Principal Components Analysis (PCA)

Correlated predictors

library(caret); library(kernlab); data(spam)
inTrain <- createDataPartition(y=spam$type,
                              p=0.75, list=FALSE)
training <- spam[inTrain,]
testing <- spam[-inTrain,]

M <- abs(cor(training[,-58]))
diag(M) <- 0
which(M > 0.8,arr.ind=T)
##        row col
## num415  34  32
## direct  40  32
## num857  32  34
## direct  40  34
## num857  32  40
## num415  34  40

Correlated predictors

names(spam)[c(34,32)]
## [1] "num415" "num857"
plot(spam[,34],spam[,32])


Basic PCA idea

  • We might not need every predictor
  • A weighted combination of predictors might be better
  • We should pick this combination to capture the “most information” possible
  • Benefits
  • Reduced number of predictors
  • Reduced noise (due to averaging)

We could rotate the plot

\[ X = 0.71 \times {\rm num 415} + 0.71 \times {\rm num857}\]

\[ Y = 0.71 \times {\rm num 415} - 0.71 \times {\rm num857}\]

X <- 0.71*training$num415 + 0.71*training$num857
Y <- 0.71*training$num415 - 0.71*training$num857
plot(X,Y)


Principal components in R - prcomp

smallSpam <- spam[,c(34,32)]
prComp <- prcomp(smallSpam)
plot(prComp$x[,1],prComp$x[,2])


Principal components in R - prcomp

prComp$rotation
##              PC1        PC2
## num415 0.7080625  0.7061498
## num857 0.7061498 -0.7080625

PCA on SPAM data

typeColor <- ((spam$type=="spam")*1 + 1)
prComp <- prcomp(log10(spam[,-58]+1))
plot(prComp$x[,1],prComp$x[,2],col=typeColor,xlab="PC1",ylab="PC2")


PCA with caret

preProc <- preProcess(log10(spam[,-58]+1),method="pca",pcaComp=2)
spamPC <- predict(preProc,log10(spam[,-58]+1))
plot(spamPC[,1],spamPC[,2],col=typeColor)


Preprocessing with PCA

preProc <- preProcess(log10(training[,-58]+1),method="pca",pcaComp=2)
trainPC <- predict(preProc,log10(training[,-58]+1))
modelFit <- train(training$type ~ .,method="glm",data=trainPC)

Preprocessing with PCA

testPC <- predict(preProc,log10(testing[,-58]+1))
confusionMatrix(testing$type,predict(modelFit,testPC))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction nonspam spam
##    nonspam     645   52
##    spam         73  380
##                                           
##                Accuracy : 0.8913          
##                  95% CI : (0.8719, 0.9087)
##     No Information Rate : 0.6243          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.7705          
##  Mcnemar's Test P-Value : 0.07364         
##                                           
##             Sensitivity : 0.8983          
##             Specificity : 0.8796          
##          Pos Pred Value : 0.9254          
##          Neg Pred Value : 0.8389          
##              Prevalence : 0.6243          
##          Detection Rate : 0.5609          
##    Detection Prevalence : 0.6061          
##       Balanced Accuracy : 0.8890          
##                                           
##        'Positive' Class : nonspam         
## 

Alternative (sets # of PCs)

modelFit <- train(training$type ~ .,method="glm",preProcess="pca",data=training)
confusionMatrix(testing$type,predict(modelFit,testing))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction nonspam spam
##    nonspam     658   39
##    spam         50  403
##                                           
##                Accuracy : 0.9226          
##                  95% CI : (0.9056, 0.9374)
##     No Information Rate : 0.6157          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8372          
##  Mcnemar's Test P-Value : 0.2891          
##                                           
##             Sensitivity : 0.9294          
##             Specificity : 0.9118          
##          Pos Pred Value : 0.9440          
##          Neg Pred Value : 0.8896          
##              Prevalence : 0.6157          
##          Detection Rate : 0.5722          
##    Detection Prevalence : 0.6061          
##       Balanced Accuracy : 0.9206          
##                                           
##        'Positive' Class : nonspam         
## 

Final thoughts on PCs

  • Most useful for linear-type models
  • Can make it harder to interpret predictors
  • Watch out for outliers!
  • Transform first (with logs/Box Cox)
  • Plot predictors to identify problems
  • For more info see
  • Exploratory Data Analysis
  • Elements of Statistical Learning

Predicting with regression

Key ideas

  • Fit a simple regression model
  • Plug in new covariates and multiply by the coefficients
  • Useful when the linear model is (nearly) correct

Pros: * Easy to implement * Easy to interpret

Cons: * Often poor performance in nonlinear settings


Example: Old faithful eruptions

Image Credit/Copyright Wally Pacholka http://www.astropics.com/


Example: Old faithful eruptions

library(caret);data(faithful); set.seed(333)
inTrain <- createDataPartition(y=faithful$waiting,
                              p=0.5, list=FALSE)
trainFaith <- faithful[inTrain,]; testFaith <- faithful[-inTrain,]
head(trainFaith)
##   eruptions waiting
## 1     3.600      79
## 3     3.333      74
## 5     4.533      85
## 6     2.883      55
## 7     4.700      88
## 8     3.600      85

Eruption duration versus waiting time

plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")


Fit a linear model

\[ ED_i = b_0 + b_1 WT_i + e_i \]

lm1 <- lm(eruptions ~ waiting,data=trainFaith)
summary(lm1)
## 
## Call:
## lm(formula = eruptions ~ waiting, data = trainFaith)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26990 -0.34789  0.03979  0.36589  1.05020 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.792739   0.227869  -7.867 1.04e-12 ***
## waiting      0.073901   0.003148  23.474  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.495 on 135 degrees of freedom
## Multiple R-squared:  0.8032, Adjusted R-squared:  0.8018 
## F-statistic:   551 on 1 and 135 DF,  p-value: < 2.2e-16

Model fit

plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(trainFaith$waiting,lm1$fitted,lwd=3)


Predict a new value

\[\hat{ED} = \hat{b}_0 + \hat{b}_1 WT\]

coef(lm1)[1] + coef(lm1)[2]*80
## (Intercept) 
##    4.119307
newdata <- data.frame(waiting=80)
predict(lm1,newdata)
##        1 
## 4.119307

Plot predictions - training and test

par(mfrow=c(1,2))
plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(trainFaith$waiting,predict(lm1),lwd=3)
plot(testFaith$waiting,testFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(testFaith$waiting,predict(lm1,newdata=testFaith),lwd=3)


Get training set/test set errors

# Calculate RMSE on training
sqrt(sum((lm1$fitted-trainFaith$eruptions)^2))
## [1] 5.75186
# Calculate RMSE on test
sqrt(sum((predict(lm1,newdata=testFaith)-testFaith$eruptions)^2))
## [1] 5.838559

Prediction intervals

pred1 <- predict(lm1,newdata=testFaith,interval="prediction")
ord <- order(testFaith$waiting)
plot(testFaith$waiting,testFaith$eruptions,pch=19,col="blue")
matlines(testFaith$waiting[ord],pred1[ord,],type="l",col=c(1,2,2),lty = c(1,1,1), lwd=3)


Same process with caret

modFit <- train(eruptions ~ waiting,data=trainFaith,method="lm")
summary(modFit$finalModel)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26990 -0.34789  0.03979  0.36589  1.05020 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.792739   0.227869  -7.867 1.04e-12 ***
## waiting      0.073901   0.003148  23.474  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.495 on 135 degrees of freedom
## Multiple R-squared:  0.8032, Adjusted R-squared:  0.8018 
## F-statistic:   551 on 1 and 135 DF,  p-value: < 2.2e-16

Notes and further reading


Predicting with regression, multiple covariates

Example: predicting wages

Image Credit http://www.cahs-media.org/the-high-cost-of-low-wages

Data from: ISLR package from the book: Introduction to statistical learning


Example: Wage data

library(ISLR); library(ggplot2); library(caret);
data(Wage); Wage <- subset(Wage,select=-c(logwage))
summary(Wage)
##       year           age               sex                    maritl    
##  Min.   :2003   Min.   :18.00   1. Male  :3000   1. Never Married: 648  
##  1st Qu.:2004   1st Qu.:33.75   2. Female:   0   2. Married      :2074  
##  Median :2006   Median :42.00                    3. Widowed      :  19  
##  Mean   :2006   Mean   :42.41                    4. Divorced     : 204  
##  3rd Qu.:2008   3rd Qu.:51.00                    5. Separated    :  55  
##  Max.   :2009   Max.   :80.00                                           
##                                                                         
##        race                   education                     region    
##  1. White:2480   1. < HS Grad      :268   2. Middle Atlantic   :3000  
##  2. Black: 293   2. HS Grad        :971   1. New England       :   0  
##  3. Asian: 190   3. Some College   :650   3. East North Central:   0  
##  4. Other:  37   4. College Grad   :685   4. West North Central:   0  
##                  5. Advanced Degree:426   5. South Atlantic    :   0  
##                                           6. East South Central:   0  
##                                           (Other)              :   0  
##            jobclass               health      health_ins  
##  1. Industrial :1544   1. <=Good     : 858   1. Yes:2083  
##  2. Information:1456   2. >=Very Good:2142   2. No : 917  
##                                                           
##                                                           
##                                                           
##                                                           
##                                                           
##       wage       
##  Min.   : 20.09  
##  1st Qu.: 85.38  
##  Median :104.92  
##  Mean   :111.70  
##  3rd Qu.:128.68  
##  Max.   :318.34  
## 

Get training/test sets

inTrain <- createDataPartition(y=Wage$wage,
                              p=0.7, list=FALSE)
training <- Wage[inTrain,]; testing <- Wage[-inTrain,]
dim(training); dim(testing)
## [1] 2102   11
## [1] 898  11

Feature plot

featurePlot(x=training[,c("age","education","jobclass")],
            y = training$wage,
            plot="pairs")


Plot age versus wage

qplot(age,wage,data=training)


Plot age versus wage colour by jobclass

qplot(age,wage,colour=jobclass,data=training)


Plot age versus wage colour by education

qplot(age,wage,colour=education,data=training)


Fit a linear model

\[ ED_i = b_0 + b_1 age + b_2 I(Jobclass_i="Information") + \sum_{k=1}^4 \gamma_k I(education_i= level k) \]

modFit<- train(wage ~ age + jobclass + education,
               method = "lm",data=training)
finMod <- modFit$finalModel
print(modFit)
## Linear Regression 
## 
## 2102 samples
##   10 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2102, 2102, 2102, 2102, 2102, 2102, ... 
## Resampling results
## 
##   RMSE      Rsquared  RMSE SD   Rsquared SD
##   36.43771  0.267012  1.281502  0.01740793 
## 
## 

Education levels: 1 = HS Grad, 2 = Some College, 3 = College Grad, 4 = Advanced Degree


Diagnostics

plot(finMod,1,pch=19,cex=0.5,col="#00000010")


Color by variables not used in the model

qplot(finMod$fitted,finMod$residuals,colour=race,data=training)


Plot by index

plot(finMod$residuals,pch=19)


Predicted versus truth in test set

pred <- predict(modFit, testing)
qplot(wage,pred,colour=year,data=testing)


If you want to use all covariates

modFitAll<- train(wage ~ .,data=training,method="lm")
pred <- predict(modFitAll, testing)
qplot(wage,pred,data=testing)


Notes and further reading


Predicting with trees

Key ideas

  • Iteratively split variables into groups
  • Evaluate “homogeneity” within each group
  • Split again if necessary

Pros:

  • Easy to interpret
  • Better performance in nonlinear settings

Cons:

  • Without pruning/cross-validation can lead to overfitting
  • Harder to estimate uncertainty
  • Results may be variable

Basic algorithm

  1. Start with all variables in one group
  2. Find the variable/split that best separates the outcomes
  3. Divide the data into two groups (“leaves”) on that split (“node”)
  4. Within each split, find the best variable/split that separates the outcomes
  5. Continue until the groups are too small or sufficiently “pure”

Measures of impurity

\[\hat{p}_{mk} = \frac{1}{N_m}\sum_{x_i\; in \; Leaf \; m}\mathbb{1}(y_i = k)\]

Misclassification Error: \[ 1 - \hat{p}_{m k(m)}; k(m) = {\rm most; common; k}\] * 0 = perfect purity * 0.5 = no purity

Gini index: \[ \sum_{k \neq k'} \hat{p}_{mk} \times \hat{p}_{mk'} = \sum_{k=1}^K \hat{p}_{mk}(1-\hat{p}_{mk}) = 1 - \sum_{k=1}^K p_{mk}^2\]

  • 0 = perfect purity
  • 0.5 = no purity

http://en.wikipedia.org/wiki/Decision_tree_learning


Measures of impurity

Deviance/information gain:

\[ -\sum_{k=1}^K \hat{p}_{mk} \log_2\hat{p}_{mk} \] * 0 = perfect purity * 1 = no purity

http://en.wikipedia.org/wiki/Decision_tree_learning

— &twocol w1:50% w2:50%

Measures of impurity

*** =left

  • Misclassification: \(1/16 = 0.06\)
  • Gini: \(1 - [(1/16)^2 + (15/16)^2] = 0.12\)
  • Information:\(-[1/16 \times log2(1/16) + 15/16 \times log2(15/16)] = 0.34\)

*** =right

  • Misclassification: \(8/16 = 0.5\)
  • Gini: \(1 - [(8/16)^2 + (8/16)^2] = 0.5\)
  • Information:\(-[1/16 \times log2(1/16) + 15/16 \times log2(15/16)] = 1\)

Example: Iris Data

data(iris); library(ggplot2)
names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50

Create training and test sets

library(caret)
inTrain <- createDataPartition(y=iris$Species,
                              p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]
dim(training); dim(testing)
## [1] 105   5
## [1] 45  5

Iris petal widths/sepal width

library(ggplot2)
qplot(Petal.Width,Sepal.Width,colour=Species,data=training)


Iris petal widths/sepal width

library(caret)
modFit <- train(Species ~ .,method="rpart",data=training)
print(modFit$finalModel)
## n= 105 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 105 70 setosa (0.33333333 0.33333333 0.33333333)  
##   2) Petal.Length< 2.45 35  0 setosa (1.00000000 0.00000000 0.00000000) *
##   3) Petal.Length>=2.45 70 35 versicolor (0.00000000 0.50000000 0.50000000)  
##     6) Petal.Width< 1.65 34  1 versicolor (0.00000000 0.97058824 0.02941176) *
##     7) Petal.Width>=1.65 36  2 virginica (0.00000000 0.05555556 0.94444444) *

Plot tree

plot(modFit$finalModel, uniform=TRUE, 
      main="Classification Tree")
text(modFit$finalModel, use.n=TRUE, all=TRUE, cex=.8)


Prettier plots

library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(modFit$finalModel)


Predicting new values

predict(modFit,newdata=testing)
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] setosa     setosa     setosa     versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] virginica  virginica  virginica  virginica  virginica  virginica 
## [37] versicolor virginica  virginica  versicolor versicolor virginica 
## [43] virginica  virginica  virginica 
## Levels: setosa versicolor virginica

Notes and further resources


Bagging

Bootstrap aggregating (bagging)

Basic idea:

  1. Resample cases and recalculate predictions
  2. Average or majority vote

Notes:

  • Similar bias
  • Reduced variance
  • More useful for non-linear functions

Ozone data

library(ElemStatLearn); data(ozone,package="ElemStatLearn")
## 
## Attaching package: 'ElemStatLearn'
## The following object is masked _by_ '.GlobalEnv':
## 
##     spam
## The following object is masked from 'package:maps':
## 
##     ozone
ozone <- ozone[order(ozone$ozone),]
head(ozone)
##     ozone radiation temperature wind
## 17      1         8          59  9.7
## 19      4        25          61  9.7
## 14      6        78          57 18.4
## 45      7        48          80 14.3
## 106     7        49          69 10.3
## 7       8        19          61 20.1

http://en.wikipedia.org/wiki/Bootstrap_aggregating


Bagged loess

ll <- matrix(NA,nrow=10,ncol=155)
for(i in 1:10){
  ss <- sample(1:dim(ozone)[1],replace=T)
  ozone0 <- ozone[ss,]; ozone0 <- ozone0[order(ozone0$ozone),]
  loess0 <- loess(temperature ~ ozone,data=ozone0,span=0.2)
  ll[i,] <- predict(loess0,newdata=data.frame(ozone=1:155))
}

Bagged loess

plot(ozone$ozone,ozone$temperature,pch=19,cex=0.5)
for(i in 1:10){lines(1:155,ll[i,],col="grey",lwd=2)}
lines(1:155,apply(ll,2,mean),col="red",lwd=2)


Bagging in caret

  • Some models perform bagging for you, in train function consider method options
  • bagEarth
  • treebag
  • bagFDA
  • Alternatively you can bag any model you choose using the bag function

More bagging in caret

library(caret)
predictors = data.frame(ozone=ozone$ozone)
temperature = ozone$temperature
treebag <- bag(predictors, temperature, B = 10,
                bagControl = bagControl(fit = ctreeBag$fit,
                                        predict = ctreeBag$pred,
                                        aggregate = ctreeBag$aggregate))
## Warning: executing %dopar% sequentially: no parallel backend registered

http://www.inside-r.org/packages/cran/caret/docs/nbBag


Example of custom bagging (continued)

plot(ozone$ozone,temperature,col='lightgrey',pch=19)
points(ozone$ozone,predict(treebag$fits[[1]]$fit,predictors),pch=19,col="red")
points(ozone$ozone,predict(treebag,predictors),pch=19,col="blue")


Parts of bagging

ctreeBag$fit
## function (x, y, ...) 
## {
##     loadNamespace("party")
##     data <- as.data.frame(x)
##     data$y <- y
##     party::ctree(y ~ ., data = data)
## }
## <environment: namespace:caret>

Parts of bagging

ctreeBag$pred
## function (object, x) 
## {
##     if (!is.data.frame(x)) 
##         x <- as.data.frame(x)
##     obsLevels <- levels(object@data@get("response")[, 1])
##     if (!is.null(obsLevels)) {
##         rawProbs <- party::treeresponse(object, x)
##         probMatrix <- matrix(unlist(rawProbs), ncol = length(obsLevels), 
##             byrow = TRUE)
##         out <- data.frame(probMatrix)
##         colnames(out) <- obsLevels
##         rownames(out) <- NULL
##     }
##     else out <- unlist(party::treeresponse(object, x))
##     out
## }
## <environment: namespace:caret>

Parts of bagging

ctreeBag$aggregate
## function (x, type = "class") 
## {
##     if (is.matrix(x[[1]]) | is.data.frame(x[[1]])) {
##         pooled <- x[[1]] & NA
##         classes <- colnames(pooled)
##         for (i in 1:ncol(pooled)) {
##             tmp <- lapply(x, function(y, col) y[, col], col = i)
##             tmp <- do.call("rbind", tmp)
##             pooled[, i] <- apply(tmp, 2, median)
##         }
##         if (type == "class") {
##             out <- factor(classes[apply(pooled, 1, which.max)], 
##                 levels = classes)
##         }
##         else out <- as.data.frame(pooled)
##     }
##     else {
##         x <- matrix(unlist(x), ncol = length(x))
##         out <- apply(x, 1, median)
##     }
##     out
## }
## <environment: namespace:caret>

Notes and further resources

Notes:

  • Bagging is most useful for nonlinear models
  • Often used with trees - an extension is random forests
  • Several models use bagging in caret’s train function

Further resources:


Random forests

Random forests

  1. Bootstrap samples
  2. At each split, bootstrap variables
  3. Grow multiple trees and vote

Pros:

  1. Accuracy

Cons:

  1. Speed
  2. Interpretability
  3. Overfitting

Iris data

data(iris); library(ggplot2); library(caret)
inTrain <- createDataPartition(y=iris$Species,
                              p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]

Random forests

library(caret)
library(randomForest)
modFit <- train(Species~ .,data=training,method="rf",prox=TRUE)
modFit
## Random Forest 
## 
## 105 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa      Accuracy SD  Kappa SD  
##   2     0.9676266  0.9509618  0.02216012   0.03352933
##   3     0.9658109  0.9482572  0.02654861   0.04007451
##   4     0.9668419  0.9497665  0.02643653   0.04005047
## 
## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 2.

Getting a single tree

library(randomForest)
getTree(modFit$finalModel,k=2)
##    left daughter right daughter split var split point status prediction
## 1              2              3         4        0.75      1          0
## 2              0              0         0        0.00     -1          1
## 3              4              5         3        4.75      1          0
## 4              0              0         0        0.00     -1          2
## 5              6              7         1        6.05      1          0
## 6              8              9         4        1.75      1          0
## 7             10             11         2        2.85      1          0
## 8             12             13         2        2.45      1          0
## 9              0              0         0        0.00     -1          3
## 10            14             15         4        1.60      1          0
## 11             0              0         0        0.00     -1          3
## 12             0              0         0        0.00     -1          3
## 13             0              0         0        0.00     -1          2
## 14            16             17         1        6.45      1          0
## 15             0              0         0        0.00     -1          3
## 16             0              0         0        0.00     -1          3
## 17             0              0         0        0.00     -1          2

Class “centers”

irisP <- classCenter(training[,c(3,4)], training$Species, modFit$finalModel$prox)
irisP <- as.data.frame(irisP); irisP$Species <- rownames(irisP)
p <- qplot(Petal.Width, Petal.Length, col=Species,data=training)
p + geom_point(aes(x=Petal.Width,y=Petal.Length,col=Species),size=5,shape=4,data=irisP)


Predicting new values

pred <- predict(modFit,testing); testing$predRight <- pred==testing$Species
table(pred,testing$Species)
##             
## pred         setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         2
##   virginica       0          2        13

Predicting new values

qplot(Petal.Width,Petal.Length,colour=predRight,data=testing,main="newdata Predictions")


Notes and further resources

Notes:

  • Random forests are usually one of the two top performing algorithms along with boosting in prediction contests.
  • Random forests are difficult to interpret but often very accurate.
  • Care should be taken to avoid overfitting (see rfcv funtion)

Further resources:


Boosting

Basic idea

  1. Take lots of (possibly) weak predictors
  2. Weight them and add them up
  3. Get a stronger predictor

Basic idea behind boosting

  1. Start with a set of classifiers \(h_1,\ldots,h_k\)
  • Examples: All possible trees, all possible regression models, all possible cutoffs.
  1. Create a classifier that combines classification functions: \(f(x) = \rm{sgn}\left(\sum_{t=1}^T \alpha_t h_t(x)\right)\).
  • Goal is to minimize error (on training set)
  • Iterative, select one \(h\) at each step
  • Calculate weights based on errors
  • Upweight missed classifications and select next \(h\)

Adaboost on Wikipedia

http://webee.technion.ac.il/people/rmeir/BoostingTutorial.pdf


Boosting in R

  • Boosting can be used with any subset of classifiers
  • One large subclass is gradient boosting
  • R has multiple boosting libraries. Differences include the choice of basic classification functions and combination rules.
  • gbm - boosting with trees.
  • mboost - model based boosting
  • ada - statistical boosting based on additive logistic regression
  • gamBoost for boosting generalized additive models
  • Most of these are available in the caret package

Wage example

library(ISLR); data(Wage); library(ggplot2); library(caret);
Wage <- subset(Wage,select=-c(logwage))
inTrain <- createDataPartition(y=Wage$wage,
                              p=0.7, list=FALSE)
training <- Wage[inTrain,]; testing <- Wage[-inTrain,]

Fit the model

modFit <- train(wage ~ ., method="gbm",data=training,verbose=FALSE)
print(modFit)
## Stochastic Gradient Boosting 
## 
## 2102 samples
##   10 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 2102, 2102, 2102, 2102, 2102, 2102, ... 
## Resampling results across tuning parameters:
## 
##   interaction.depth  n.trees  RMSE      Rsquared   RMSE SD   Rsquared SD
##   1                   50      35.66577  0.2999192  1.575565  0.02337561 
##   1                  100      35.11476  0.3111799  1.481016  0.02157929 
##   1                  150      35.02718  0.3137694  1.438008  0.02062790 
##   2                   50      35.04157  0.3148807  1.500431  0.02285973 
##   2                  100      34.88069  0.3195858  1.452622  0.02054340 
##   2                  150      34.93116  0.3181132  1.448111  0.01943547 
##   3                   50      34.88720  0.3196563  1.479245  0.02258544 
##   3                  100      34.95639  0.3172195  1.451944  0.02100039 
##   3                  150      35.16021  0.3111064  1.447594  0.02022995 
## 
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
## 
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## RMSE was used to select the optimal model using  the smallest value.
## The final values used for the model were n.trees = 100,
##  interaction.depth = 2, shrinkage = 0.1 and n.minobsinnode = 10.

Plot the results

qplot(predict(modFit,testing),wage,data=testing)


Model based prediction

Basic idea

  1. Assume the data follow a probabilistic model
  2. Use Bayes’ theorem to identify optimal classifiers

Pros:

  • Can take advantage of structure of the data
  • May be computationally convenient
  • Are reasonably accurate on real problems

Cons:

  • Make additional assumptions about the data
  • When the model is incorrect you may get reduced accuracy

Model based approach

  1. Our goal is to build parametric model for conditional distribution \(P(Y = k | X = x)\)

  2. A typical approach is to apply Bayes theorem: \[ Pr(Y = k | X=x) = \frac{Pr(X=x|Y=k)Pr(Y=k)}{\sum_{\ell=1}^K Pr(X=x |Y = \ell) Pr(Y=\ell)}\] \[Pr(Y = k | X=x) = \frac{f_k(x) \pi_k}{\sum_{\ell = 1}^K f_{\ell}(x) \pi_{\ell}}\]

  3. Typically prior probabilities \(\pi_k\) are set in advance.

  4. A common choice for \(f_k(x) = \frac{1}{\sigma_k \sqrt{2 \pi}}e^{-\frac{(x-\mu_k)^2}{\sigma_k^2}}\), a Gaussian distribution

  5. Estimate the parameters (\(\mu_k\),\(\sigma_k^2\)) from the data.

  6. Classify to the class with the highest value of \(P(Y = k | X = x)\)


Classifying using the model

A range of models use this approach

  • Linear discriminant analysis assumes \(f_k(x)\) is multivariate Gaussian with same covariances
  • Quadratic discrimant analysis assumes \(f_k(x)\) is multivariate Gaussian with different covariances
  • Model based prediction assumes more complicated versions for the covariance matrix
  • Naive Bayes assumes independence between features for model building

http://statweb.stanford.edu/~tibs/ElemStatLearn/


Why linear discriminant analysis?

\[log \frac{Pr(Y = k | X=x)}{Pr(Y = j | X=x)}\] \[ = log \frac{f_k(x)}{f_j(x)} + log \frac{\pi_k}{\pi_j}\] \[ = log \frac{\pi_k}{\pi_j} - \frac{1}{2}(\mu_k + \mu_j)^T \Sigma^{-1}(\mu_k + \mu_j)\] \[ + x^T \Sigma^{-1} (\mu_k - \mu_j)\]

http://statweb.stanford.edu/~tibs/ElemStatLearn/


Decision boundaries


Discriminant function

\[\delta_k(x) = x^T \Sigma^{-1} \mu_k - \frac{1}{2}\mu_k \Sigma^{-1}\mu_k + log(\mu_k)\]

  • Decide on class based on \(\hat{Y}(x) = argmax_k \delta_k(x)\)
  • We usually estimate parameters with maximum likelihood

Naive Bayes

Suppose we have many predictors, we would want to model: \(P(Y = k | X_1,\ldots,X_m)\)

We could use Bayes Theorem to get:

\[P(Y = k | X_1,\ldots,X_m) = \frac{\pi_k P(X_1,\ldots,X_m| Y=k)}{\sum_{\ell = 1}^K P(X_1,\ldots,X_m | Y=k) \pi_{\ell}}\] \[ \propto \pi_k P(X_1,\ldots,X_m| Y=k)\]

This can be written:

\[P(X_1,\ldots,X_m, Y=k) = \pi_k P(X_1 | Y = k)P(X_2,\ldots,X_m | X_1,Y=k)\] \[ = \pi_k P(X_1 | Y = k) P(X_2 | X_1, Y=k) P(X_3,\ldots,X_m | X_1,X_2, Y=k)\] \[ = \pi_k P(X_1 | Y = k) P(X_2 | X_1, Y=k)\ldots P(X_m|X_1\ldots,X_{m-1},Y=k)\]

We could make an assumption to write this:

\[ \approx \pi_k P(X_1 | Y = k) P(X_2 | Y = k)\ldots P(X_m |,Y=k)\]


Example: Iris Data

data(iris); library(ggplot2)
names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50

Create training and test sets

library(caret)
inTrain <- createDataPartition(y=iris$Species,
                              p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]
dim(training); dim(testing)
## [1] 105   5
## [1] 45  5

Build predictions

library(klaR); library(MASS)
modlda = train(Species ~ .,data=training,method="lda")
modnb = train(Species ~ ., data=training,method="nb")
plda = predict(modlda,testing); pnb = predict(modnb,testing)
table(plda,pnb)
##             pnb
## plda         setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         16         0
##   virginica       0          1        13

Comparison of results

equalPredictions = (plda==pnb)
qplot(Petal.Width,Sepal.Width,colour=equalPredictions,data=testing)


Regularized regression

Basic idea

  1. Fit a regression model
  2. Penalize (or shrink) large coefficients

Pros:

  • Can help with the bias/variance tradeoff
  • Can help with model selection

Cons:

  • May be computationally demanding on large data sets
  • Does not perform as well as random forests and boosting

A motivating example

\[Y = \beta_0 + \beta_1 X_1 + \beta_2 X_2 + \epsilon\]

where \(X_1\) and \(X_2\) are nearly perfectly correlated (co-linear). You can approximate this model by:

\[Y = \beta_0 + (\beta_1 + \beta_2)X_1 + \epsilon\]

The result is:

  • You will get a good estimate of \(Y\)
  • The estimate (of \(Y\)) will be biased
  • We may reduce variance in the estimate

Prostate cancer

library(ElemStatLearn); data(prostate)
str(prostate)
## 'data.frame':    97 obs. of  10 variables:
##  $ lcavol : num  -0.58 -0.994 -0.511 -1.204 0.751 ...
##  $ lweight: num  2.77 3.32 2.69 3.28 3.43 ...
##  $ age    : int  50 58 74 58 62 50 64 58 47 63 ...
##  $ lbph   : num  -1.39 -1.39 -1.39 -1.39 -1.39 ...
##  $ svi    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lcp    : num  -1.39 -1.39 -1.39 -1.39 -1.39 ...
##  $ gleason: int  6 6 7 6 6 6 6 6 6 6 ...
##  $ pgg45  : int  0 0 20 0 0 0 0 0 0 0 ...
##  $ lpsa   : num  -0.431 -0.163 -0.163 -0.163 0.372 ...
##  $ train  : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...

Subset selection

Code here


Model selection approach: split samples

  • No method better when data/computation time permits it

  • Approach
  1. Divide data into training/test/validation
  2. Treat validation as test data, train all competing models on the train data and pick the best one on validation.
  3. To appropriately assess performance on new data apply to test set
  4. You may re-split and reperform steps 1-3
  • Two common problems
  • Limited data
  • Computational complexity

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Decomposing expected prediction error

Assume \(Y_i = f(X_i) + \epsilon_i\)

\(EPE(\lambda) = E\left[\{Y - \hat{f}_{\lambda}(X)\}^2\right]\)

Suppose \(\hat{f}_{\lambda}\) is the estimate from the training data and look at a new data point \(X = x^*\)

\[E\left[\{Y - \hat{f}_{\lambda}(x^*)\}^2\right] = \sigma^2 + \{E[\hat{f}_{\lambda}(x^*)] - f(x^*)\}^2 + var[\hat{f}_\lambda(x_0)]\]

= Irreducible error + Bias\(^2\) + Variance

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Another issue for high-dimensional data

small = prostate[1:5,]
lm(lpsa ~ .,data =small)
## 
## Call:
## lm(formula = lpsa ~ ., data = small)
## 
## Coefficients:
## (Intercept)       lcavol      lweight          age         lbph  
##     9.60615      0.13901     -0.79142      0.09516           NA  
##         svi          lcp      gleason        pgg45    trainTRUE  
##          NA           NA     -2.08710           NA           NA

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Hard thresholding

  • Model \(Y = f(X) + \epsilon\)

  • Set \(\hat{f}_{\lambda}(x) = x'\beta\)

  • Constrain only \(\lambda\) coefficients to be nonzero.

  • Selection problem is after chosing \(\lambda\) figure out which \(p - \lambda\) coefficients to make nonzero

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Regularization for regression

If the \(\beta_j\)’s are unconstrained: * They can explode * And hence are susceptible to very high variance

To control variance, we might regularize/shrink the coefficients.

\[ PRSS(\beta) = \sum_{j=1}^n (Y_j - \sum_{i=1}^m \beta_{1i} X_{ij})^2 + P(\lambda; \beta)\]

where \(PRSS\) is a penalized form of the sum of squares. Things that are commonly looked for

  • Penalty reduces complexity
  • Penalty reduces variance
  • Penalty respects structure of the problem

Ridge regression

Solve:

\[ \sum_{i=1}^N \left(y_i - \beta_0 + \sum_{j=1}^p x_{ij}\beta_j \right)^2 + \lambda \sum_{j=1}^p \beta_j^2\]

equivalent to solving

\(\sum_{i=1}^N \left(y_i - \beta_0 + \sum_{j=1}^p x_{ij}\beta_j \right)^2\) subject to \(\sum_{j=1}^p \beta_j^2 \leq s\) where \(s\) is inversely proportional to \(\lambda\)

Inclusion of \(\lambda\) makes the problem non-singular even if \(X^TX\) is not invertible.

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Tuning parameter \(\lambda\)

  • \(\lambda\) controls the size of the coefficients
  • \(\lambda\) controls the amount of {}
  • As \(\lambda \rightarrow 0\) we obtain the least square solution
  • As \(\lambda \rightarrow \infty\) we have \(\hat{\beta}_{\lambda=\infty}^{ridge} = 0\)

Lasso

\(\sum_{i=1}^N \left(y_i - \beta_0 + \sum_{j=1}^p x_{ij}\beta_j \right)^2\) subject to \(\sum_{j=1}^p |\beta_j| \leq s\)

also has a lagrangian form

\[ \sum_{i=1}^N \left(y_i - \beta_0 + \sum_{j=1}^p x_{ij}\beta_j \right)^2 + \lambda \sum_{j=1}^p |\beta_j|\]

For orthonormal design matrices (not the norm!) this has a closed form solution

\[\hat{\beta}_j = sign(\hat{\beta}_j^0)(|\hat{\beta}_j^0 - \gamma)^{+}\]

but not in general.

http://www.biostat.jhsph.edu/~ririzarr/Teaching/649/ http://www.cbcb.umd.edu/~hcorrada/PracticalML/


Combining predictors

Key ideas

  • You can combine classifiers by averaging/voting
  • Combining classifiers improves accuracy
  • Combining classifiers reduces interpretability
  • Boosting, bagging, and random forests are variants on this theme

Basic intuition - majority vote

Suppose we have 5 completely independent classifiers

If accuracy is 70% for each: * \(10\times(0.7)^3(0.3)^2 + 5\times(0.7)^4(0.3)^2 + (0.7)^5\) * 83.7% majority vote accuracy

With 101 independent classifiers * 99.9% majority vote accuracy


Approaches for combining classifiers

  1. Bagging, boosting, random forests
  • Usually combine similar classifiers
  1. Combining different classifiers
  • Model stacking
  • Model ensembling

Example with Wage data

Create training, test and validation sets

library(ISLR); data(Wage); library(ggplot2); library(caret);
Wage <- subset(Wage,select=-c(logwage))

# Create a building data set and validation set
inBuild <- createDataPartition(y=Wage$wage,
                              p=0.7, list=FALSE)
validation <- Wage[-inBuild,]; buildData <- Wage[inBuild,]

inTrain <- createDataPartition(y=buildData$wage,
                              p=0.7, list=FALSE)
training <- buildData[inTrain,]; testing <- buildData[-inTrain,]

Wage data sets

Create training, test and validation sets

dim(training)
## [1] 1474   11
dim(testing)
## [1] 628  11
dim(validation)
## [1] 898  11

Build two different models

mod1 <- train(wage ~.,method="glm",data=training)
mod2 <- train(wage ~.,method="rf",
              data=training, 
              trControl = trainControl(method="cv"),number=3)

Predict on the testing set

pred1 <- predict(mod1,testing); pred2 <- predict(mod2,testing)
qplot(pred1,pred2,colour=wage,data=testing)


Fit a model that combines predictors

predDF <- data.frame(pred1,pred2,wage=testing$wage)
combModFit <- train(wage ~.,method="gam",data=predDF)
combPred <- predict(combModFit,predDF)

Testing errors

sqrt(sum((pred1-testing$wage)^2))
## [1] 819.2359
sqrt(sum((pred2-testing$wage)^2))
## [1] 839.2307
sqrt(sum((combPred-testing$wage)^2))
## [1] 795.2577

Predict on validation data set

pred1V <- predict(mod1,validation); pred2V <- predict(mod2,validation)
predVDF <- data.frame(pred1=pred1V,pred2=pred2V)
combPredV <- predict(combModFit,predVDF)

Evaluate on validation

sqrt(sum((pred1V-validation$wage)^2))
## [1] 1090.906
sqrt(sum((pred2V-validation$wage)^2))
## [1] 1114.767
sqrt(sum((combPredV-validation$wage)^2))
## [1] 1090.317

Notes and further resources

  • Even simple blending can be useful
  • Typical model for binary/multiclass data
  • Build an odd number of models
  • Predict with each model
  • Predict the class by majority vote
  • This can get dramatically more complicated
  • Simple blending in caret: caretEnsemble (use at your own risk!)
  • Wikipedia ensemble learning

Unsupervised prediction

Key ideas

  • Sometimes you don’t know the labels for prediction
  • To build a predictor
  • Create clusters
  • Name clusters
  • Build predictor for clusters
  • In a new data set
  • Predict clusters

Iris example ignoring species labels

data(iris); library(ggplot2); library(caret)
inTrain <- createDataPartition(y=iris$Species,
                              p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]
dim(training); dim(testing)
## [1] 105   5
## [1] 45  5

Cluster with k-means

kMeans1 <- kmeans(subset(training,select=-c(Species)),centers=3)
training$clusters <- as.factor(kMeans1$cluster)
qplot(Petal.Width,Petal.Length,colour=clusters,data=training)


Compare to real labels

table(kMeans1$cluster,training$Species)
##    
##     setosa versicolor virginica
##   1      0          4        32
##   2     35          0         0
##   3      0         31         3

Build predictor

modFit <- train(clusters ~.,data=subset(training,select=-c(Species)),method="rpart")
table(predict(modFit,training),training$Species)
##    
##     setosa versicolor virginica
##   1      0          6        34
##   2     35          0         0
##   3      0         29         1

Apply on test

testClusterPred <- predict(modFit,testing) 
table(testClusterPred ,testing$Species)
##                
## testClusterPred setosa versicolor virginica
##               1      0          5        15
##               2     15          0         0
##               3      0         10         0

Notes and further reading


Forecasting

What is different?

  • Data are dependent over time
  • Specific pattern types
  • Trends - long term increase or decrease
  • Seasonal patterns - patterns related to time of week, month, year, etc.
  • Cycles - patterns that rise and fall periodically
  • Subsampling into training/test is more complicated
  • Similar issues arise in spatial data
  • Dependency between nearby observations
  • Location specific effects
  • Typically goal is to predict one or more observations into the future.
  • All standard predictions can be used (with caution!)

Also common in geographic analyses

http://xkcd.com/1138/


Google data

library(quantmod); library(forecast)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
## 
## Attaching package: 'quantmod'
## The following object is masked from 'package:Hmisc':
## 
##     Lag
## Loading required package: timeDate
## This is forecast 7.0
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
## 
##     getResponse
from.dat <- as.Date("01/01/08", format="%m/%d/%y")
to.dat <- as.Date("12/31/13", format="%m/%d/%y")
getSymbols("AAPL", src="google", from = from.dat, to = to.dat)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## Warning in download.file(paste(google.URL, "q=", Symbols.name,
## "&startdate=", : downloaded length 65602 != reported length 200
## [1] "AAPL"
head(AAPL)
##            AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume
## 2008-01-02     28.47     28.61    27.51      27.83   269794140
## 2008-01-03     27.92     28.20    27.53      27.85   210516460
## 2008-01-04     27.35     27.57    25.56      25.72   363888854
## 2008-01-07     25.89     26.23    24.32      25.38   518047922
## 2008-01-08     25.73     26.07    24.40      24.46   380953888
## 2008-01-09     24.50     25.60    24.00      25.60   453884711

Summarize monthly and store as time series

library(xts); library(quantmod)
mAAPL <- to.monthly(AAPL)
googOpen <- Op(mAAPL)
ts1 <- ts(googOpen,frequency=12)
plot(ts1,xlab="Years+1", ylab="GOOG")


Example time series decomposition

  • Trend - Consistently increasing pattern over time
  • Seasonal - When there is a pattern over a fixed period of time that recurs.
  • Cyclic - When data rises and falls over non fixed periods

https://www.otexts.org/fpp/6/1


Decompose a time series into parts

plot(decompose(ts1),xlab="Years+1")


Training and test sets

ts1Train <- window(ts1,start=1,end=5)
ts1Test <- window(ts1,start=5,end=(7-0.01))
## Warning in window.default(x, ...): 'end' value not changed
ts1Train
##     Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1 28.47 19.46 17.78 20.90 24.99 26.94 23.46 22.84 24.63 15.99 15.13 13.04
## 2 12.27 12.73 12.59 14.87 17.97 19.50 20.50 23.60 24.00 26.48 27.11 28.89
## 3 30.49 27.48 29.39 33.57 37.69 37.10 36.33 37.21 35.35 40.88 43.17 45.04
## 4 46.52 48.76 50.78 50.16 49.96 49.84 47.99 56.83 55.12 54.34 56.77 54.65
## 5 58.49

Simple moving average

\[ Y_{t}=\frac{1}{2*k+1}\sum_{j=-k}^k {y_{t+j}}\]

library(forecast)
plot(ts1Train)
lines(ma(ts1Train,order=3),col="red")


Exponential smoothing

Example - simple exponential smoothing \[\hat{y}_{t+1} = \alpha y_t + (1-\alpha)\hat{y}_{t-1}\]

https://www.otexts.org/fpp/7/6


Exponential smoothing

ets1 <- ets(ts1Train,model="MMM")
fcast <- forecast(ets1)
plot(fcast); lines(ts1Test,col="red")


Get the accuracy

accuracy(fcast,ts1Test)
##                     ME      RMSE       MAE        MPE      MAPE      MASE
## Training set -0.230240  3.109365  2.251173  -1.206482  8.100539 0.1719304
## Test set     -5.948349 23.957857 21.466624 -11.485072 29.873756 1.6394854
##                    ACF1 Theil's U
## Training set 0.02001109        NA
## Test set     0.92810962  4.302183

Notes and further resources