変動係数

例として,子供と大人の身長の架空データを検討しよう.大人は子供時代の1.8倍高くなると想定し,以下のようなデータを生成する.

childheight<-rnorm(100)*5+90
adultheight<-childheight*1.8
height <- cbind(child = childheight,adult = adultheight)
boxplot(height,col=rainbow(2)) 

子どもと大人の身長の平均,標準偏差(ただし\(n-1\)で割る標本標準偏差),そして,標準偏差を平均で割った変動係数を計算する.

heightsum<-rbind(apply(height,2,mean),
      apply(height,2,sd),
      apply(height,2,function(x) sd(x)/mean(x)))
rownames(heightsum)<-c("mean","sd","cv")
heightsum
##            child        adult
## mean 89.60355208 161.28639374
## sd    5.32448446   9.58407203
## cv    0.05942269   0.05942269

標準偏差は大人の方が大きいが,データの相対的な大きさを考慮した変動係数では,両者のばらつき具合は同じであるという結論になる.

同じことをirisデータでやる.

boxplot(Petal.Width ~ Species, data = iris,col=rainbow(3))

irissum<-rbind(as.vector(by(iris$Petal.Width, iris$Species, mean)),
               as.vector(by(iris$Petal.Width, iris$Species, sd)),
               as.vector(by(iris$Petal.Width, iris$Species, 
                            function(x) sd(x)/mean(x))))
colnames(irissum)<-levels(iris$Species)
rownames(irissum)<-c("mean","sd","cv")
irissum
##         setosa versicolor virginica
## mean 0.2460000  1.3260000 2.0260000
## sd   0.1053856  0.1977527 0.2746501
## cv   0.4283967  0.1491348 0.1355627

一次変換

cars$speedデータを使う.plot用準備.

y0<-rep(0,length(cars$speed))
y1<-rep(1,length(cars$speed))

データに10を足す.

xp10<-cars$speed+10

plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xp10,y0, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xp10),0, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xp10),0,col="red")
segments(max(cars$speed),1,max(xp10),0,col="red")
segments(mean(cars$speed),1,mean(xp10),0,col="blue")

データを2倍する.

xt2<-2*cars$speed

plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xt2,y0, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xt2),0, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xt2),0,col="red")
segments(max(cars$speed),1,max(xt2),0,col="red")
segments(mean(cars$speed),1,mean(xt2),0,col="blue")

データを2倍して10を足す.

xt2p10<-2*cars$speed+10

plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xt2p10,y0, axes = F,xlab="",ylab="",col="red",
     xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xt2p10),0, axes = F,xlab="",ylab="",col="blue",
     xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xt2p10),0,col="red")
segments(max(cars$speed),1,max(xt2p10),0,col="red")
segments(mean(cars$speed),1,mean(xt2p10),0,col="blue")

標準化

plot用準備.

y01<-rep(0.1,length(cars$speed))
y1<-rep(1,length(cars$speed))
ym1<-rep(-1,length(cars$speed))
ym01<-rep(-0.1,length(cars$speed))

scaleで標準化を実行(ただし,標準偏差は\(n-1\)で割る標本標準偏差が使われる).

xstd<-(cars$speed-mean(cars$speed))/sd(cars$speed)
round(mean(xstd),10)
## [1] 0
sd(xstd)
## [1] 1

cars$speedcars$distを標準化して比較する.

plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,120), ylim=c(-1,1),pch=19)
par(new=T)
plot(xstd,y01, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(xstd),0.1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,120), ylim=c(-1,1),pch=19)
abline(h =0.1)
abline(h =1)
segments(min(cars$speed),1,min(xstd),0.1,col="red")
segments(max(cars$speed),1,max(xstd),0.1,col="red")
segments(mean(cars$speed),1,mean(xstd),0.1,col="blue")

ystd<-(cars$dist-mean(cars$dist))/sd(cars$dist)

par(new=T)
plot(cars$dist,ym1, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(cars$dist),-1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,120), ylim=c(-1,1),pch=19)
par(new=T)
plot(ystd,ym01, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(ystd),-0.1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,120), ylim=c(-1,1),pch=19)
abline(h =-0.1)
abline(h =-1)
segments(min(cars$dist),-1,min(ystd),-0.1,col="red")
segments(max(cars$dist),-1,max(ystd),-0.1,col="red")
segments(mean(cars$dist),-1,mean(ystd),-0.1,col="blue")

もうちょっとクローズアップする.青縦線は平均(0),赤色点線は\(1\sigma\)点(\(\pm 1\)),\(2\sigma\)点(\(\pm 2\)).

plot(xstd,y01, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,3), ylim=c(-0.2,0.2))
par(new=T)
plot(mean(xstd),0.1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,3), ylim=c(-0.2,0.2),pch=19)
abline(h =0.1)
par(new=T)
plot(ystd,ym01, axes = F,xlab="",ylab="",col="red",
     xlim=c(-3,3), ylim=c(-0.2,0.2))
par(new=T)
plot(mean(ystd),-0.1, axes = F,xlab="",ylab="",col="blue",
     xlim=c(-3,3), ylim=c(-0.2,0.2),pch=19)
abline(h =-0.1)
abline(v = 0,col="blue")
abline(v = -1,col="red",lty= 2)
abline(v = -2,col="red",lty= 2)
abline(v = 1,col="red",lty= 2)
abline(v = 2,col="red",lty= 2)