돗수분포표

chest<-33:48
freq<-c(3,18,81,185,420,749,1073,1079,934,658,370,92,50,21,4,1)
chest.table<-data.frame(Chest=chest, Freq=freq)
chest.table
##    Chest Freq
## 1     33    3
## 2     34   18
## 3     35   81
## 4     36  185
## 5     37  420
## 6     38  749
## 7     39 1073
## 8     40 1079
## 9     41  934
## 10    42  658
## 11    43  370
## 12    44   92
## 13    45   50
## 14    46   21
## 15    47    4
## 16    48    1
str(chest.table)
## 'data.frame':    16 obs. of  2 variables:
##  $ Chest: int  33 34 35 36 37 38 39 40 41 42 ...
##  $ Freq : num  3 18 81 185 420 ...

확률 히스토그램

barplot(chest.table$Freq/5738)

barplot(chest.table$Freq/5738, names.arg=33:48, space=0, col="white")

기초통계와 표준편차 계산

chest.long<-rep(chest.table$Chest, chest.table$Freq)
str(chest.long)
##  int [1:5738] 33 33 33 34 34 34 34 34 34 34 ...

chest.long 을 이용하여 기초통계와 표준편차를 계산하면,

summary(chest.long)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   33.00   38.00   40.00   39.83   41.00   48.00
sd(chest.long)
## [1] 2.049616

히스토그램으로 나타내기

hist(chest.long)

hist(chest.long, prob=TRUE)

히스토그램의 내부 계산

h.chest<-hist(chest.long, plot=F)
h.chest
## $breaks
##  [1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 
## $counts
##  [1]   21   81  185  420  749 1073 1079  934  658  370   92   50   21    4
## [15]    1
## 
## $density
##  [1] 0.0036598118 0.0141164169 0.0322411990 0.0731962356 0.1305332869
##  [6] 0.1869989543 0.1880446148 0.1627744859 0.1146741025 0.0644823980
## [11] 0.0160334611 0.0087138376 0.0036598118 0.0006971070 0.0001742768
## 
## $mids
##  [1] 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5
## [15] 47.5
## 
## $xname
## [1] "chest.long"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
hist(chest.long, prob=TRUE, breaks=32.5:48.5)

main.title<-"Fitting Normal Distribution"
sub.title<-"Chest Circumferences of Scottish Soldiers"
x.lab<-"Inches"
y.lab<-"Proportion"
hist(chest.long, breaks=32.5:48.5, prob=TRUE, main=main.title, sub=sub.title, xlab=x.lab, ylab=y.lab)

평균 \(\pm\) 표준편차의 의미

hist(chest.long, breaks=32.5:48.5, prob=T, main=main.title, sub=sub.title, xlab=x.lab, ylab=y.lab)
abline(v=c(38, 42), lty=2, col="red")

h.chest.2<-hist(chest.long, breaks=32.5:48.5, plot=F)
h.chest.2
## $breaks
##  [1] 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5
## [15] 46.5 47.5 48.5
## 
## $counts
##  [1]    3   18   81  185  420  749 1073 1079  934  658  370   92   50   21
## [15]    4    1
## 
## $density
##  [1] 0.0005228303 0.0031369815 0.0141164169 0.0322411990 0.0731962356
##  [6] 0.1305332869 0.1869989543 0.1880446148 0.1627744859 0.1146741025
## [11] 0.0644823980 0.0160334611 0.0087138376 0.0036598118 0.0006971070
## [16] 0.0001742768
## 
## $mids
##  [1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 
## $xname
## [1] "chest.long"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
h.chest.2$density[6:10]
## [1] 0.1305333 0.1869990 0.1880446 0.1627745 0.1146741
y<-h.chest.2$density[6:10]
x.coord<-c(38,38.5:41.5,42,42,rep(41.5:38.5,each=2),38,38)
y.coord<-c(rep(0,6),rep(rev(y),each=2),0)
hist(chest.long, breaks=32.5:48.5, prob=TRUE, main=main.title, sub=sub.title, xlab=x.lab, ylab=y.lab)
polygon(x.coord, y.coord, density=20)

x.coord[1:6]
## [1] 38.0 38.5 39.5 40.5 41.5 42.0
y
## [1] 0.1305333 0.1869990 0.1880446 0.1627745 0.1146741
diff(x.coord[1:6])
## [1] 0.5 1.0 1.0 1.0 0.5
diff(x.coord[1:6])*y
## [1] 0.06526664 0.18699895 0.18804461 0.16277449 0.05733705
sum(diff(x.coord[1:6])*y)
## [1] 0.6604217

정규분포 밀도함수와 비교

mean.chest<-mean(chest.long)
sd.chest<-sd(chest.long)
x<-seq(32.5,48.5,length=1000)
y.norm<-dnorm(x, mean=mean.chest, sd=sd.chest)
hist(chest.long, breaks=32.5:48.5, prob=TRUE, main=main.title, sub=sub.title, xlab=x.lab, ylab=y.lab)
lines(x, y.norm, col="red")

x축의 눈금 조정

hist(chest.long, breaks=32.5:48.5, prob=TRUE, main=main.title, sub=sub.title, xlab=x.lab, ylab=y.lab, axes=F)
axis(side=1, at=seq(32,48,by=2), labels=paste(seq(32,48, by=2)))
axis(side=2)