lottery<-read.table("lottery.txt",header=T)
str(lottery)
## 'data.frame': 254 obs. of 2 variables:
## $ lottery.number: int 810 156 140 542 507 972 431 981 865 499 ...
## $ lottery.payoff: num 190 120 286 184 384 ...
head(lottery)
## lottery.number lottery.payoff
## 1 810 190.0
## 2 156 120.5
## 3 140 285.5
## 4 542 184.0
## 5 507 384.5
## 6 972 324.5
summary(lottery)
## lottery.number lottery.payoff
## Min. : 0.0 Min. : 83.0
## 1st Qu.:230.0 1st Qu.:194.2
## Median :440.5 Median :270.2
## Mean :472.2 Mean :290.4
## 3rd Qu.:734.5 3rd Qu.:364.0
## Max. :999.0 Max. :869.5
apply(lottery, 2, sd)
## lottery.number lottery.payoff
## 294.4773 128.8884
stem(lottery$lottery.number,scale=5)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 01788
## 1 | 11568
## 2 | 006
## 3 | 4
## 4 | 27
## 5 |
## 6 | 79
## 7 | 2779
## 8 | 79
## 9 | 29
## 10 | 5679
## 11 | 012247
## 12 | 23
## 13 | 36
## 14 | 0
## 15 | 688
## 16 | 07
## 17 | 048
## 18 | 0257
## 19 | 2789
## 20 | 09
## 21 | 49
## 22 | 36
## 23 | 0015689
## 24 | 355
## 25 | 3345778
## 26 | 78
## 27 | 45
## 28 | 26
## 29 | 349
## 30 | 0059
## 31 | 00449
## 32 |
## 33 | 357
## 34 | 68
## 35 | 6778
## 36 |
## 37 | 4
## 38 | 03
## 39 | 156
## 40 | 236
## 41 | 0136
## 42 | 4
## 43 | 01444
## 44 | 016
## 45 |
## 46 | 78
## 47 | 246799
## 48 | 05
## 49 | 699
## 50 | 778
## 51 | 5568
## 52 | 478
## 53 | 79
## 54 | 112
## 55 | 359
## 56 |
## 57 |
## 58 | 02
## 59 | 7
## 60 | 24
## 61 | 56
## 62 | 3
## 63 |
## 64 | 068
## 65 | 239
## 66 | 112
## 67 | 7
## 68 | 349
## 69 | 13458
## 70 | 1
## 71 | 14
## 72 |
## 73 | 35
## 74 | 244
## 75 | 01
## 76 | 11477
## 77 | 19
## 78 | 111
## 79 |
## 80 | 889
## 81 | 02
## 82 | 78
## 83 |
## 84 | 129
## 85 | 448
## 86 | 335
## 87 | 9
## 88 | 4
## 89 | 346
## 90 | 6
## 91 | 33899
## 92 | 18
## 93 | 57
## 94 | 157
## 95 | 4
## 96 | 0344
## 97 | 258
## 98 | 1177
## 99 | 69
h10<-hist(lottery$lottery.number)
h10
## $breaks
## [1] 0 100 200 300 400 500 600 700 800 900 1000
##
## $counts
## [1] 26 32 33 22 29 21 23 20 21 27
##
## $density
## [1] 0.0010236220 0.0012598425 0.0012992126 0.0008661417 0.0011417323
## [6] 0.0008267717 0.0009055118 0.0007874016 0.0008267717 0.0010629921
##
## $mids
## [1] 50 150 250 350 450 550 650 750 850 950
##
## $xname
## [1] "lottery$lottery.number"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
attach(lottery)
par(mfrow=c(1,2))
h10.2<-hist(lottery.number, breaks=seq(0,1000,by=100),include.lowest=T)
h10.3<-hist(lottery.number, breaks=seq(0,1000,by=100),right=F)
list(breaks=h10.2$breaks, counts=h10.2$counts, density=h10.2$density)
## $breaks
## [1] 0 100 200 300 400 500 600 700 800 900 1000
##
## $counts
## [1] 26 32 33 22 29 21 23 20 21 27
##
## $density
## [1] 0.0010236220 0.0012598425 0.0012992126 0.0008661417 0.0011417323
## [6] 0.0008267717 0.0009055118 0.0007874016 0.0008267717 0.0010629921
list(breaks=h10.3$breaks, counts=h10.3$counts, density=h10.3$density)
## $breaks
## [1] 0 100 200 300 400 500 600 700 800 900 1000
##
## $counts
## [1] 26 31 32 24 29 21 23 20 21 27
##
## $density
## [1] 0.0010236220 0.0012204724 0.0012598425 0.0009448819 0.0011417323
## [6] 0.0008267717 0.0009055118 0.0007874016 0.0008267717 0.0010629921
par(mfrow=c(1,1))
h10.4<-hist(lottery.number, nclass=10)
list(breaks=h10.4$breaks, counts=h10.4$counts, density=h10.4$density)
## $breaks
## [1] 0 100 200 300 400 500 600 700 800 900 1000
##
## $counts
## [1] 26 32 33 22 29 21 23 20 21 27
##
## $density
## [1] 0.0010236220 0.0012598425 0.0012992126 0.0008661417 0.0011417323
## [6] 0.0008267717 0.0009055118 0.0007874016 0.0008267717 0.0010629921
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,3))
hist(lottery.number, nclass=10, sub="nclass=10")
hist(lottery.number, nclass=9, sub="ncalss=9")
hist(lottery.number, nclass=8, sub="nclass=8")
hist(lottery.number, nclass=7, sub="nclass=7")
hist(lottery.number, nclass=6, sub="nclass=6")
hist(lottery.number, nclass=5, sub="nclass=5")
hist(lottery.number, nclass=4, sub="nclass=4")
hist(lottery.number, nclass=3, sub="nclass=3")
hist(lottery.number, nclass=2, sub="nclass=2")
par(mfrow=c(1,2))
h4<-hist(lottery.number, nclass=4, plot=F)
h3<-hist(lottery.number, nclass=3, plot=F)
list(breaks=h4$breaks, counts=h4$counts, density=h4$density)
## $breaks
## [1] 0 200 400 600 800 1000
##
## $counts
## [1] 58 55 50 43 48
##
## $density
## [1] 0.0011417323 0.0010826772 0.0009842520 0.0008464567 0.0009448819
list(breaks=h3$breaks, counts=h3$counts, density=h3$density)
## $breaks
## [1] 0 500 1000
##
## $counts
## [1] 142 112
##
## $density
## [1] 0.0011181102 0.0008818898
par(mfrow=c(1,2))
h4.breaks<-hist(lottery.number, breaks=seq(0,1000, by=250), sub="(with Breaks)")
h3.breaks<-hist(lottery.number, breaks=seq(0,999, by=333), sub="(with Breaks)")
list(breaks=h4.breaks$breaks, counts=h4.breaks$counts, density=h4.breaks$density)
## $breaks
## [1] 0 250 500 750 1000
##
## $counts
## [1] 73 69 53 59
##
## $density
## [1] 0.0011496063 0.0010866142 0.0008346457 0.0009291339
list(breaks=h3.breaks$breaks, counts=h3.breaks$counts, density=h3.breaks$density)
## $breaks
## [1] 0 333 666 999
##
## $counts
## [1] 99 78 77
##
## $density
## [1] 0.0011704618 0.0009221820 0.0009103592
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,3))
hist(lottery.number, breaks=seq(0,1000, by=100), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,999, by=111), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1000, by=125), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1001, by=143), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1002, by=167), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1000, by=200), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1000, by=250), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,999, by=333), sub="(with Breaks)")
hist(lottery.number, breaks=seq(0,1000, by=500), sub="(with Breaks)")
par(mfrow=c(1,1))
hist(lottery.number, breaks=seq(0,1000,by=250),ann=F)
title(main="Histogram of Numbers Drawn", sub="(with Breaks Argument)", xlab="Number", ylab="Frequency")
hist(lottery.payoff)
mean.payoff<-mean(lottery.payoff)
med.payoff<-median(lottery.payoff)
hist(lottery.payoff,axes=F,ylim=c(-10,100))
axis(side=1,at=seq(0,1000,by=100),labels=paste(seq(0,1000,by=100)))
arrows(x0=mean.payoff,y0=-10, x1=mean.payoff, y1=0, length=0.1, code=2)
abline(v=med.payoff,lty=2,col="red")
axis(side=2,at=seq(0,100,by=20),labels=paste(seq(0,100,by=20)))
lottery[1,]
이므로, 각 좌표를 평면상에 나타내면 text() 함수를 이용하여plot(lottery[1,], xlim=c(0,1000), ylim=c(0,1000), axes=F, xlab="Number", ylab="Payoff")
axis(side=1, at=c(0,500,810,1000), labels=c(0,500,810,1000))
axis(side=2, at=c(0,190, 500 ,1000), labels=c(0,190, 500,1000))
text(lottery[1,], labels=c("(810, 190)"), pos=1)
arrows(x0=810,y0=190,x1=810,y1=0, code=2, length=0.2)
arrows(x0=810,y0=190,x1=0,y1=190, code=2, length=0.2)
id.0<-which(lottery$lottery.number==0)
lottery[id.0,]
## lottery.number lottery.payoff
## 99 0 96
id.499<-which(lottery$lottery.number==499)
lottery[id.499,]
## lottery.number lottery.payoff
## 10 499 869.5
## 132 499 247.5
id.999<-which(lottery$lottery.number==999)
lottery[id.999,]
## lottery.number lottery.payoff
## 168 999 239
plot(lottery[1,], xlim=c(0,1000), ylim=c(0,1000), axes=F, xlab="Number", ylab="Payoff")
axis(side=1, at=c(0,500,810,1000), labels=c(0,500,810,1000))
axis(side=2, at=c(0,190, 500 ,1000), labels=c(0,190, 500,1000))
text(lottery[1,], labels=c("(810, 190)"), pos=1)
arrows(x0=810,y0=190,x1=810,y1=0, code=2, length=0.2)
arrows(x0=810,y0=190,x1=0,y1=190, code=2, length=0.2)
points(lottery[c(id.0,id.499,id.999),])
text(lottery[id.0,], labels="(0, 96)", pos=4)
text(lottery[c(id.499,id.999),], labels=c("(499, 869.5)", "(499, 247.5)", "(999, 239)"), pos=2)
plot(lottery.number, lottery.payoff)
plot(lottery.number, lottery.payoff,pch=20)
abline(h=600,lty=2)
identify(lottery.number, lottery.payoff, n=5, labels=paste(lottery.number))
## integer(0)
high.payoff<-which(lottery.payoff>=600)
high.payoff
## [1] 10 11 95 107 215
lottery.number[high.payoff]
## [1] 499 20 77 767 919
plot(lottery.number, lottery.payoff,pch=20)
abline(h=600,lty=2)
text(x=lottery.number[high.payoff],y=lottery.payoff[high.payoff],labels=lottery.number[high.payoff],pos=4)
o.payoff<-order(lottery.payoff,decreasing=TRUE)
lottery.payoff[o.payoff][1:10]
## [1] 869.5 756.0 668.5 640.0 637.0 567.5 557.5 556.5 553.0 548.5
lottery.number[o.payoff][1:10]
## [1] 499 767 20 77 919 87 662 698 894 779
plot(lottery.number, lottery.payoff,pch=20, ylim=c(0,1000))
abline(h=seq(0,1000,by=250),lty=2)
abline(v=seq(0,1000,by=100),lty=2)
plot(lottery.number, lottery.payoff,pch=20, ylim=c(0,1000))
abline(lsfit(lottery.number, lottery.payoff)$coef)
abline(h=seq(0,1000,by=250),lty=2)
abline(v=seq(0,1000,by=100),lty=2)
abline(lsfit(lottery.number, lottery.payoff)$coef)
lines(lowess(lottery.number,lottery.payoff, f=1/3),col="blue")
lines(lowess(lottery.number,lottery.payoff, f=2/3),col="red")
legend(x=0,y=1000, lty=1, col=c("black","blue","red"), legend=c("lsfit","lowess, f=1/3","lowess, f=2/3"))
apply(lottery, 2, fivenum)
## lottery.number lottery.payoff
## [1,] 0.0 83.00
## [2,] 230.0 194.00
## [3,] 440.5 270.25
## [4,] 735.0 365.00
## [5,] 999.0 869.50
par(mfrow=c(1,2))
boxplot(lottery.number, main="Numbers Drawn")
boxplot(lottery.payoff, main="Payoff in Dollars")
lottery.fac<-lottery
lottery.fac$classes.10<-cut(lottery.fac$lottery.number, breaks=c(seq(0,900, by=100),999), right=F)
head(lottery.fac)
## lottery.number lottery.payoff classes.10
## 1 810 190.0 [800,900)
## 2 156 120.5 [100,200)
## 3 140 285.5 [100,200)
## 4 542 184.0 [500,600)
## 5 507 384.5 [500,600)
## 6 972 324.5 [900,999)
detach()
attach(lottery.fac)
boxplot(lottery.payoff~classes.10, data=lottery.fac)
lottery.fac$classes<-factor(classes.10, labels=0:9)
head(lottery.fac)
## lottery.number lottery.payoff classes.10 classes
## 1 810 190.0 [800,900) 8
## 2 156 120.5 [100,200) 1
## 3 140 285.5 [100,200) 1
## 4 542 184.0 [500,600) 5
## 5 507 384.5 [500,600) 5
## 6 972 324.5 [900,999) 9
boxplot(lottery.payoff~classes, data=lottery.fac, main="Payoff by Numbers Drawn")
boxplot() 대신에 그냥 plot()을 하면 어떻게 되는지 시도해 볼 것. 차이점은?
이제 산점도에 각 변수의 주변분포(marginal distribution)를 표시해 보자. 당첨번호는 히스토그램으로, 당첨금액은 boxplot 으로 그려 본다.
par(fig=c(0,0.8,0,0.8))
plot(lottery.payoff~lottery.number, data=lottery.fac, pch=20, xlab="Numbers Drawn", ylab="Payoff in Dollars")
par(fig=c(0,0.8,0.55,1), new=TRUE)
hist(lottery.number, axes=F, ann=F)
par(fig=c(0.65,1,0,0.8), new=TRUE)
boxplot(lottery.payoff, horiz=TRUE, axes=F)
detach()
par(opar)
save(file="lottery.RData",list=ls())
savehistory("lottery.Rhistory")