例として,子供と大人の身長の架空データを検討しよう.大人は子供時代の1.8倍高くなると想定し,以下のようなデータを生成する.
childheight<-rnorm(100)*5+90
adultheight<-childheight*1.8
height <- cbind(child = childheight,adult = adultheight)
boxplot(height,col=rainbow(2))
子どもと大人の身長の平均,標準偏差(ただし\(n-1\)で割る標本標準偏差),そして,標準偏差を平均で割った変動係数を計算する.
heightsum<-rbind(apply(height,2,mean),
apply(height,2,sd),
apply(height,2,function(x) sd(x)/mean(x)))
rownames(heightsum)<-c("mean","sd","cv")
heightsum
## child adult
## mean 89.60355208 161.28639374
## sd 5.32448446 9.58407203
## cv 0.05942269 0.05942269
標準偏差は大人の方が大きいが,データの相対的な大きさを考慮した変動係数では,両者のばらつき具合は同じであるという結論になる.
同じことをiris
データでやる.
boxplot(Petal.Width ~ Species, data = iris,col=rainbow(3))
irissum<-rbind(as.vector(by(iris$Petal.Width, iris$Species, mean)),
as.vector(by(iris$Petal.Width, iris$Species, sd)),
as.vector(by(iris$Petal.Width, iris$Species,
function(x) sd(x)/mean(x))))
colnames(irissum)<-levels(iris$Species)
rownames(irissum)<-c("mean","sd","cv")
irissum
## setosa versicolor virginica
## mean 0.2460000 1.3260000 2.0260000
## sd 0.1053856 0.1977527 0.2746501
## cv 0.4283967 0.1491348 0.1355627
cars$speed
データを使う.plot用準備.
y0<-rep(0,length(cars$speed))
y1<-rep(1,length(cars$speed))
データに10を足す.
xp10<-cars$speed+10
plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xp10,y0, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xp10),0, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xp10),0,col="red")
segments(max(cars$speed),1,max(xp10),0,col="red")
segments(mean(cars$speed),1,mean(xp10),0,col="blue")
データを2倍する.
xt2<-2*cars$speed
plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xt2,y0, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xt2),0, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xt2),0,col="red")
segments(max(cars$speed),1,max(xt2),0,col="red")
segments(mean(cars$speed),1,mean(xt2),0,col="blue")
データを2倍して10を足す.
xt2p10<-2*cars$speed+10
plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
par(new=T)
plot(xt2p10,y0, axes = F,xlab="",ylab="",col="red",
xlim=c(0,60), ylim=c(0,1))
par(new=T)
plot(mean(xt2p10),0, axes = F,xlab="",ylab="",col="blue",
xlim=c(0,60), ylim=c(0,1),pch=19)
abline(h =0)
abline(h =1)
segments(min(cars$speed),1,min(xt2p10),0,col="red")
segments(max(cars$speed),1,max(xt2p10),0,col="red")
segments(mean(cars$speed),1,mean(xt2p10),0,col="blue")
plot用準備.
y01<-rep(0.1,length(cars$speed))
y1<-rep(1,length(cars$speed))
ym1<-rep(-1,length(cars$speed))
ym01<-rep(-0.1,length(cars$speed))
scale
で標準化を実行(ただし,標準偏差は\(n-1\)で割る標本標準偏差が使われる).
xstd<-(cars$speed-mean(cars$speed))/sd(cars$speed)
round(mean(xstd),10)
## [1] 0
sd(xstd)
## [1] 1
cars$speed
とcars$dist
を標準化して比較する.
plot(cars$speed,y1, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(cars$speed),1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,120), ylim=c(-1,1),pch=19)
par(new=T)
plot(xstd,y01, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(xstd),0.1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,120), ylim=c(-1,1),pch=19)
abline(h =0.1)
abline(h =1)
segments(min(cars$speed),1,min(xstd),0.1,col="red")
segments(max(cars$speed),1,max(xstd),0.1,col="red")
segments(mean(cars$speed),1,mean(xstd),0.1,col="blue")
ystd<-(cars$dist-mean(cars$dist))/sd(cars$dist)
par(new=T)
plot(cars$dist,ym1, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(cars$dist),-1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,120), ylim=c(-1,1),pch=19)
par(new=T)
plot(ystd,ym01, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,120), ylim=c(-1,1))
par(new=T)
plot(mean(ystd),-0.1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,120), ylim=c(-1,1),pch=19)
abline(h =-0.1)
abline(h =-1)
segments(min(cars$dist),-1,min(ystd),-0.1,col="red")
segments(max(cars$dist),-1,max(ystd),-0.1,col="red")
segments(mean(cars$dist),-1,mean(ystd),-0.1,col="blue")
もうちょっとクローズアップする.青縦線は平均(0),赤色点線は\(1\sigma\)点(\(\pm 1\)),\(2\sigma\)点(\(\pm 2\)).
plot(xstd,y01, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,3), ylim=c(-0.2,0.2))
par(new=T)
plot(mean(xstd),0.1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,3), ylim=c(-0.2,0.2),pch=19)
abline(h =0.1)
par(new=T)
plot(ystd,ym01, axes = F,xlab="",ylab="",col="red",
xlim=c(-3,3), ylim=c(-0.2,0.2))
par(new=T)
plot(mean(ystd),-0.1, axes = F,xlab="",ylab="",col="blue",
xlim=c(-3,3), ylim=c(-0.2,0.2),pch=19)
abline(h =-0.1)
abline(v = 0,col="blue")
abline(v = -1,col="red",lty= 2)
abline(v = -2,col="red",lty= 2)
abline(v = 1,col="red",lty= 2)
abline(v = 2,col="red",lty= 2)