setwd('~/lecture/riii')
load('./Statistics/cdc.Rdata')
str(cdc)
## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : num  0 0 1 1 0 1 1 0 0 1 ...
##  $ hlthplan: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ smoke100: num  0 1 1 0 0 0 0 0 1 0 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
cdc$exerany = as.factor(cdc$exerany)
cdc$hlthplan = as.factor(cdc$hlthplan)
cdc$smoke100 = as.factor(cdc$smoke100)
#(1) 請問資料中各健康狀況的分佈比例為何? 

table(cdc$genhlth) / nrow(cdc)
## 
## excellent very good      good      fair      poor 
##   0.23285   0.34860   0.28375   0.10095   0.03385
#(2) 請問資料中男女生有抽煙比率各為多少?

table(cdc$gender,cdc$smoke100) / nrow(cdc)
##    
##           0       1
##   m 0.22735 0.25110
##   f 0.30060 0.22095
#(3) 請繪製年紀的直方圖。 

library('ggplot2')
## Warning: package 'ggplot2' was built under R version 3.5.2
g = ggplot(cdc,aes(x=age))
g+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#(4) 請繪製不同健康狀況族群BMI指數的盒鬚圖。 
#• 註1: 英制bmi公式 體重 / ⾝高^2 * 703
#• 註2: 可使用ggplot2中 geom_boxplot()函數 

cdc$bmi = cdc$weight / (cdc$height^2) * 703
g = ggplot(cdc,aes(x=genhlth,y=bmi))
g+geom_boxplot()

#(5) 請問⾝高、體重、年紀的相關係數為何?
cor(cdc[,c('height','weight','age')])
##            height      weight          age
## height  1.0000000 0.555322192 -0.125181791
## weight  0.5553222 1.000000000  0.001608902
## age    -0.1251818 0.001608902  1.000000000