자료 생성

chest.circum<-c(rep(33,3),rep(34,18),rep(35,81),rep(36,185),rep(37,420), rep(38,749),rep(39,1073),rep(40,1079),rep(41,934),rep(42,658),rep(43,370),rep(44,92),rep(45,50),rep(46,21),rep(47,4),rep(48,1))

기초통계와 표준편차 계산

summary(chest.circum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   33.00   38.00   40.00   39.83   41.00   48.00
sd(chest.circum)
## [1] 2.049616

히스토그램으로 나타내기

hist(chest.circum)

hist(chest.circum,prob=T)

히스토그램의 내부 계산

h.circum<-hist(chest.circum, plot=F)
h.circum
## $breaks
##  [1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 
## $counts
##  [1]   21   81  185  420  749 1073 1079  934  658  370   92   50   21    4
## [15]    1
## 
## $density
##  [1] 0.0036598118 0.0141164169 0.0322411990 0.0731962356 0.1305332869
##  [6] 0.1869989543 0.1880446148 0.1627744859 0.1146741025 0.0644823980
## [11] 0.0160334611 0.0087138376 0.0036598118 0.0006971070 0.0001742768
## 
## $mids
##  [1] 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5
## [15] 47.5
## 
## $xname
## [1] "chest.circum"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
hist(chest.circum,prob=T,breaks=32.5:48.5)

main.title<-"Fitting Normal Distribution"
sub.title<-"Chest Circumferences of Scottish Soldiers"
x.lab<-"Inches"
y.lab<-"Proportion"
hist(chest.circum,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab)

평균 \(\pm\) 표준편차의 의미

hist(chest.circum,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab)
abline(v=c(38,42),lty=2,col="red")

h.chest<-hist(chest.circum, breaks=32.5:48.5, plot=F)
h.chest
## $breaks
##  [1] 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5
## [15] 46.5 47.5 48.5
## 
## $counts
##  [1]    3   18   81  185  420  749 1073 1079  934  658  370   92   50   21
## [15]    4    1
## 
## $density
##  [1] 0.0005228303 0.0031369815 0.0141164169 0.0322411990 0.0731962356
##  [6] 0.1305332869 0.1869989543 0.1880446148 0.1627744859 0.1146741025
## [11] 0.0644823980 0.0160334611 0.0087138376 0.0036598118 0.0006971070
## [16] 0.0001742768
## 
## $mids
##  [1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 
## $xname
## [1] "chest.circum"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
h.chest$density[6:10]
## [1] 0.1305333 0.1869990 0.1880446 0.1627745 0.1146741
y<-h.chest$density[6:10]
x.coord<-c(38,38.5:41.5,42,42,rep(41.5:38.5,each=2),38,38)
y.coord<-c(rep(0,6),rep(rev(y),each=2),0)
hist(chest.circum,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab)
polygon(x.coord,y.coord,density=20)

x.coord[1:6]
## [1] 38.0 38.5 39.5 40.5 41.5 42.0
y
## [1] 0.1305333 0.1869990 0.1880446 0.1627745 0.1146741
diff(x.coord[1:6])
## [1] 0.5 1.0 1.0 1.0 0.5
diff(x.coord[1:6])*y
## [1] 0.06526664 0.18699895 0.18804461 0.16277449 0.05733705
sum(diff(x.coord[1:6])*y)
## [1] 0.6604217

data.frame으로 관리하기

chest<-33:48
freq<-c(3,18,81,185,420,749,1073,1079,934,658,370,92,50,21,4,1)
sum(freq)
## [1] 5738
quetelet.chest<-data.frame(chest,freq)
quetelet.chest$prob<-quetelet.chest$freq/sum(quetelet.chest$freq)
quetelet.chest.vec<-rep(33,3)
for (i in 34:48) {
quetelet.chest.vec<-c(quetelet.chest.vec,rep(i,quetelet.chest$freq[i-32]))
}
length(quetelet.chest.vec)
## [1] 5738
hist(quetelet.chest.vec,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab)
abline(v=c(38,42),lty=2,col="red")

정규분포 밀도함수와 비교

mean.chest<-mean(quetelet.chest.vec)
sd.chest<-sd(quetelet.chest.vec)
x<-seq(32.5,48.5,length=1000)
y.norm<-dnorm(x,mean=mean.chest,sd=sd.chest)
hist(quetelet.chest.vec,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab)
lines(x,y.norm,col="red")

x축의 눈금 조정

hist(chest.circum,breaks=32.5:48.5,prob=T,main=main.title,sub=sub.title,xlab=x.lab,ylab=y.lab,axes=F)
axis(side=1,at=seq(32,48,by=2),labels=paste(seq(32,48,by=2)))
axis(side=2)