1. R datasets:: Nile

Ashwan에서 측정한 Nile 강의 유량 자료 (R datasets:: Nile) 줄기-잎 그림을 그리고 자료에 대하여 설명하여라.

summary(Nile)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   456.0   798.5   893.5   919.4  1032.5  1370.0
stem(Nile,2)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##    4 | 6
##    5 | 
##    5 | 
##    6 | 
##    6 | 5899
##    7 | 0001234444
##    7 | 55667778
##    8 | 000011222233344
##    8 | 555556667779
##    9 | 00112222444
##    9 | 66678899
##   10 | 01222344
##   10 | 55
##   11 | 00012244
##   11 | 566678
##   12 | 1123
##   12 | 56
##   13 | 
##   13 | 7
stem(Nile[Nile != 456])
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##    6 | 5899
##    7 | 000123444455667778
##    8 | 000011222233344555556667779
##    9 | 0011222244466678899
##   10 | 0122234455
##   11 | 00012244566678
##   12 | 112356
##   13 | 7
# riverflow = 456 is outlier as suggested by above graph

2. R datasets:: warpbreaks

R의 warpbreaks 자료를 이용하여 wool A, B 각기 따로 breaks의 stem and leaf display 그리고 분포의 특징에 대하여 비교 설명하여라.

woolA=warpbreaks[warpbreaks$wool == "A",]
woolB=warpbreaks[warpbreaks$wool == "B",]
stem(woolA$breaks)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   1 | 0257888
##   2 | 114566689
##   3 | 00566
##   4 | 3
##   5 | 124
##   6 | 7
##   7 | 0
stem(woolB$breaks,0.5)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   1 | 345566799
##   2 | 001146788999
##   3 | 199
##   4 | 124
# woolA의 줄기-잎 그림과 비교를 용이하게 하기 위해 줄기의 길이를 조정하였다.
stem(woolB$breaks)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   1 | 34
##   1 | 5566799
##   2 | 00114
##   2 | 6788999
##   3 | 1
##   3 | 99
##   4 | 124

3. R datasets:: Seatbelts

  1. 운전자 1000명당, 운행거리 10000km 당 사망 운전자 수

\(killed = Driverskilled*(drivers/1000)*(kms/1000)\)

seatbelt.frame = data.frame(Seatbelts)
attach(seatbelt.frame)
killed = DriversKilled*(drivers/1000)*(kms/10000)
summary(killed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   112.4   228.3   290.6   308.4   377.1   631.9
  1. 사망 운전자 수의 줄기 그림을 그리고 간단히 서술하여라
stem(killed)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 1334
##   1 | 55666667778888899999
##   2 | 00000000011111122222223333333333344444444
##   2 | 555555555666677777777777888888999
##   3 | 000000011112223333333334444
##   3 | 555566666677777788889999
##   4 | 00001111112223444444
##   4 | 55566666778999
##   5 | 12233
##   5 | 567
##   6 | 3
  1. 안전띠 법이 시행되기 전과 후의 사망 운전자 수의 줄기 그림을 각각 그리고 비교하여라
seatbelt0= seatbelt.frame[seatbelt.frame$law==0,]
killed0 = seatbelt0$DriversKilled*(seatbelt0$drivers/1000)*(seatbelt0$kms/10000) 
# 안전띠 법 시행 이전 사망 운전자 수

seatbelt1= seatbelt.frame[seatbelt.frame$law==1,]
killed1 = seatbelt1$DriversKilled*(seatbelt1$drivers/1000)*(seatbelt1$kms/10000) 
# 안전띠 법 시행 이후 사망 운전자 수


stem(killed0,0.5)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 1334566667788999
##   2 | 0000000111122222333333333334444444455555555566667777777777888888999
##   3 | 000001111223333333334444555566667777778889999
##   4 | 00001111112223444445556666677899
##   5 | 12233567
##   6 | 3
# 안전띠 법 시행 이후의 줄기-잎 그림과 비교를 용이하기 위해 줄기의 단위를 일치시킴
stem(killed1)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 56788899
##   2 | 0011227
##   3 | 002668
##   4 | 49
stem(killed0)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 1334
##   1 | 566667788999
##   2 | 00000001111222223333333333344444444
##   2 | 55555555566667777777777888888999
##   3 | 000001111223333333334444
##   3 | 555566667777778889999
##   4 | 0000111111222344444
##   4 | 5556666677899
##   5 | 12233
##   5 | 567
##   6 | 3
stem(killed1,2)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 
##   1 | 56788899
##   2 | 001122
##   2 | 7
##   3 | 002
##   3 | 668
##   4 | 4
##   4 | 9

4.Stem-Leaf Plot vs. Histogram

5. R Code

#1 Nile 

# Number of stems
1 + log(length(Nile),2) # Sturges
10*log(length(Nile),10) # Dixon & Kronmal
2*sqrt(length(Nile)) # Velleman

plot(Nile)

hist(Nile)

summary(Nile)

stem(Nile)
stem(Nile[Nile != 456])


#2 warpbreaks
warpbreaks
woolA=warpbreaks[warpbreaks$wool == "A",]
woolB=warpbreaks[warpbreaks$wool == "B",]
test = t.test(woolA$breaks,woolB$breaks)

# Number of stems
1 + log(nrow(woolA),2) # Sturges
10*log(nrow(woolA),10) # Dixon & Kronmal
2*sqrt(nrow(woolA)) # Velleman

summary(woolA)
stem(woolA$ breaks)

1 + log(nrow(woolB),2) # Sturges
10*log(nrow(woolB),10) # Dixon & Kronmal
2*sqrt(nrow(woolB)) # Velleman

summary(woolB)
stem(woolB$breaks)
stem(woolB$breaks,0.5)

 
#3 Seatbelts
help(Seatbelts)
## starting httpd help server ... done
help(UKDriverDeaths)

Seatbelts
seatbelt.frame = data.frame(Seatbelts)
attach(seatbelt.frame)
## The following objects are masked from seatbelt.frame (pos = 3):
## 
##     drivers, DriversKilled, front, kms, law, PetrolPrice, rear,
##     VanKilled
str(seatbelt.frame)
killed = DriversKilled*(drivers/1000)*(kms/10000)

# Number of stems
1 + log(length(killed),2) # Sturges
10*log(length(killed),10) # Dixon & Kronmal
2*sqrt(length(killed)) # Velleman

summary(killed)
stem(killed)

seatbelt0= seatbelt.frame[seatbelt.frame$law==0,]
killed0 = seatbelt0$DriversKilled*(seatbelt0$drivers/1000)*(seatbelt0$kms/10000)
# 안전띠 법 시행 이전 사망 운전자 수

1 + log(length(killed0),2) # Sturges
10*log(length(killed0),10) # Dixon & Kronmal
2*sqrt(length(killed0)) # Velleman

summary(killed0)
stem(killed0,0.5)
stem(killed0,0.5,width=20)
 

seatbelt1= seatbelt.frame[seatbelt.frame$law==1,]
killed1 = seatbelt1$DriversKilled*(seatbelt1$drivers/1000)*(seatbelt1$kms/10000)
# 안전띠 법 시행 이후 사망 운전자 수

1 + log(length(killed1),2) # Sturges
10*log(length(killed1),10) # Dixon & Kronmal
2*sqrt(length(killed1)) # Velleman

summary(killed1)
stem(killed1)
stem(killed1,2)