프러시아에서 20년간 14개 연대에서 발생한 말발굽에 의한 사망 사고 기록
n.deaths<-0:4
n.camps<-c(144,91,32,11,2)
horsekick<-data.frame(n.deaths=n.deaths, n.camps=n.camps)
horsekick
## n.deaths n.camps
## 1 0 144
## 2 1 91
## 3 2 32
## 4 3 11
## 5 4 2
이 자료를 하나의 긴 벡터로 나타내는 것은 간단히
horsekick.long<-rep(n.deaths, times=n.camps)
str(horsekick.long)
## int [1:280] 0 0 0 0 0 0 0 0 0 0 ...
summary(horsekick.long)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.0 0.7 1.0 4.0
위 자료를 누적분포로 도식화하기 위한 첫 작업
options(digits=3)
horsekick.p<-n.camps/sum(n.camps)
horsekick.p
## [1] 0.51429 0.32500 0.11429 0.03929 0.00714
horsekick.cum<-round(cumsum(horsekick.p)*100, digits=1)
plot(n.deaths, horsekick.cum, xlim=c(-0.5,4.5), ylim=c(50,100), xlab="사망자 수", ylab="누적 백분률(%)", yaxt="n")
axis(side=2,at=horsekick.cum, labels=horsekick.cum)
누적 분포를 알기 쉽도록 격자 설정
options(digits=3)
plot(n.deaths, horsekick.cum, xlim=c(-0.5,4.5), ylim=c(50,100), xlab="사망자 수", ylab="누적 백분률(%)", yaxt="n")
axis(side=2, at=horsekick.cum, labels=horsekick.cum)
abline(v=0:4, lty=2)
lines(c(0,1), rep(horsekick.cum[1], 2), lty=2)
lines(c(0,2), rep(horsekick.cum[2], 2), lty=2)
lines(c(0,3), rep(horsekick.cum[3], 2), lty=2)
lines(c(0,4), rep(horsekick.cum[4], 2), lty=2)
lines(c(0,4.5), rep(horsekick.cum[5], 2), lty=2)
lines()
는 아래와 같이 apply()
를 이용하여 작업을 다소 줄일 수도 있다. 우선, 좌표를 정리한다.
h.x<-rbind(rep(0, 5), c(1:4, 4.5))
h.x
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0.0
## [2,] 1 2 3 4 4.5
h.y<-sapply(horsekick.cum, rep, times=2)
h.y
## [,1] [,2] [,3] [,4] [,5]
## [1,] 51.4 83.9 95.4 99.3 100
## [2,] 51.4 83.9 95.4 99.3 100
h.xy<-rbind(h.x, h.y)
h.xy
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.0 0.0 0.0 0.0 0.0
## [2,] 1.0 2.0 3.0 4.0 4.5
## [3,] 51.4 83.9 95.4 99.3 100.0
## [4,] 51.4 83.9 95.4 99.3 100.0
plot()
에서 abline()
까지가 반복해서 나올 것이므로, source()
를 이용하기 위한 코드 작성.
readLines("horsekick_plot.R")
## [1] "options(digits=3)"
## [2] "plot(n.deaths, horsekick.cum, xlim=c(-0.5,4.5), ylim=c(50,100), xlab=\"사망자 수\", ylab=\"누적 백분률(%)\", yaxt=\"n\")"
## [3] "axis(side=2, at=horsekick.cum, labels=horsekick.cum)"
## [4] "abline(v=0:4, lty=2)"
격자선을 놓는 과정까지를 두 줄로 줄일 수 있다.
source("horsekick_plot.R")
apply(h.xy, 2, function(h) lines(x=h[1:2], y=h[3:4], lty=2))
## NULL
l.xy<-function(h) lines(x=h[1:2], y=h[3:4], lty=2)
누적분포 윗 면적을 명확히 표시하기 위하여 빗금
source("horsekick_plot.R")
apply(h.xy, 2, l.xy)
## NULL
polygon(c(0,1,1,0),c(rep(horsekick.cum[1],2), rep(horsekick.cum[5],2)), density=10, angle=15)
polygon(c(1,2,2,1),c(rep(horsekick.cum[2],2), rep(horsekick.cum[5],2)), density=10, angle=165)
polygon(c(2,3,3,2),c(rep(horsekick.cum[3],2), rep(horsekick.cum[5],2)), density=10, angle=30)
polygon(c(3,4,4,3),c(rep(horsekick.cum[4],2), rep(horsekick.cum[5],2)), density=10, angle=150)
polygon()
들을 한 줄로 정리하기 위하여 좌표들을 모으고,
poly.x<-matrix(c(0:3, rep(1:4, 2), 0:3), ncol=4, byrow=T)
poly.x
## [,1] [,2] [,3] [,4]
## [1,] 0 1 2 3
## [2,] 1 2 3 4
## [3,] 1 2 3 4
## [4,] 0 1 2 3
poly.y<-rbind(sapply(horsekick.cum[1:4], rep, times=2), matrix(rep(horsekick.cum[5], 8),nrow=2))
poly.y
## [,1] [,2] [,3] [,4]
## [1,] 51.4 83.9 95.4 99.3
## [2,] 51.4 83.9 95.4 99.3
## [3,] 100.0 100.0 100.0 100.0
## [4,] 100.0 100.0 100.0 100.0
poly.xy<-rbind(poly.x, poly.y)
poly.xy
## [,1] [,2] [,3] [,4]
## [1,] 0.0 1.0 2.0 3.0
## [2,] 1.0 2.0 3.0 4.0
## [3,] 1.0 2.0 3.0 4.0
## [4,] 0.0 1.0 2.0 3.0
## [5,] 51.4 83.9 95.4 99.3
## [6,] 51.4 83.9 95.4 99.3
## [7,] 100.0 100.0 100.0 100.0
## [8,] 100.0 100.0 100.0 100.0
lapply(data.frame(poly.xy), function(v) cbind(v[1:4], v[5:8]))
## $X1
## [,1] [,2]
## [1,] 0 51.4
## [2,] 1 51.4
## [3,] 1 100.0
## [4,] 0 100.0
##
## $X2
## [,1] [,2]
## [1,] 1 83.9
## [2,] 2 83.9
## [3,] 2 100.0
## [4,] 1 100.0
##
## $X3
## [,1] [,2]
## [1,] 2 95.4
## [2,] 3 95.4
## [3,] 3 100.0
## [4,] 2 100.0
##
## $X4
## [,1] [,2]
## [1,] 3 99.3
## [2,] 4 99.3
## [3,] 4 100.0
## [4,] 3 100.0
sapply()
, lapply()
, mapply()
를 적재적소에 활용하여 분량을 줄여보자.
source("horsekick_plot.R")
apply(h.xy, 2, l.xy)
## NULL
mapply(polygon, lapply(data.frame(poly.xy), function(v) cbind(v[1:4], v[5:8])), density=10, angle=c(15, 165, 30, 150))
## $X1
## NULL
##
## $X2
## NULL
##
## $X3
## NULL
##
## $X4
## NULL
누적분포 윗 면적이 곧 평균임을 나타내기 위해 막대를 다른 방향으로 집적. 우선 좌표들을 다시 정리하자.
poly.x.2<-matrix(c(rep(0,4), rep(1:4, 2), rep(0, 4)), ncol=4, byrow=TRUE)
poly.y.2<-rbind(sapply(horsekick.cum[1:4], rep, times=2), sapply(horsekick.cum[2:5], rep, times=2))
poly.xy.2<-rbind(poly.x.2, poly.y.2)
source("horsekick_plot.R")
apply(h.xy, 2, l.xy)
## NULL
mapply(polygon, lapply(data.frame(poly.xy.2), function(v) cbind(v[1:4], v[5:8])), density=10, angle=c(15, 165, 30, 150))
## $X1
## NULL
##
## $X2
## NULL
##
## $X3
## NULL
##
## $X4
## NULL
중간에 약간의 작업이 들어가긴 하지만 아래 소스코드와 비교하라.
options(digits=3)
plot(n.deaths, horsekick.cum, xlim=c(-0.5,4.5), ylim=c(50,100), xlab="사망자 수", ylab="누적 백분률(%)", yaxt="n")
axis(side=2,at=horsekick.cum, labels=horsekick.cum)
abline(v=0:5, lty=2)
apply(h.xy, 2, l.xy)
## NULL
polygon(c(0,1,1,0),c(rep(horsekick.cum[1],2),rep(horsekick.cum[2],2)), density=10, angle=15)
polygon(c(0,2,2,0),c(rep(horsekick.cum[2],2),rep(horsekick.cum[3],2)), density=10, angle=165)
polygon(c(0,3,3,0),c(rep(horsekick.cum[3],2),rep(horsekick.cum[4],2)), density=10, angle=30)
polygon(c(0,4,4,0),c(rep(horsekick.cum[4],2),rep(horsekick.cum[5],2)), density=10, angle=150)
이번에 쌓아놓은 막대 면적의 합은 평균을 계산한 것임을 확인.