setwd('~/lecture/riii')
load('./Statistics/cdc.Rdata')
str(cdc)
## 'data.frame': 20000 obs. of 9 variables:
## $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
## $ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
## $ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
## $ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
## $ height : num 70 64 60 66 61 64 71 67 65 70 ...
## $ weight : int 175 125 105 132 150 114 194 170 150 180 ...
## $ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
## $ age : int 77 33 49 42 55 55 31 45 27 44 ...
## $ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
cdc$exerany = as.factor(cdc$exerany)
cdc$hlthplan = as.factor(cdc$hlthplan)
cdc$smoke100 = as.factor(cdc$smoke100)
#(1) 請問資料中各健康狀況的分佈比例為何?
table(cdc$genhlth) / nrow(cdc)
##
## excellent very good good fair poor
## 0.23285 0.34860 0.28375 0.10095 0.03385
#(2) 請問資料中男女生有抽煙比率各為多少?
table(cdc$gender,cdc$smoke100) / nrow(cdc)
##
## 0 1
## m 0.22735 0.25110
## f 0.30060 0.22095
#(3) 請繪製年紀的直方圖。
library('ggplot2')
## Warning: package 'ggplot2' was built under R version 3.5.2
g = ggplot(cdc,aes(x=age))
g+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#(4) 請繪製不同健康狀況族群BMI指數的盒鬚圖。
#• 註1: 英制bmi公式 體重 / ⾝高^2 * 703
#• 註2: 可使用ggplot2中 geom_boxplot()函數
cdc$bmi = cdc$weight / (cdc$height^2) * 703
g = ggplot(cdc,aes(x=genhlth,y=bmi))
g+geom_boxplot()

#(5) 請問⾝高、體重、年紀的相關係數為何?
cor(cdc[,c('height','weight','age')])
## height weight age
## height 1.0000000 0.555322192 -0.125181791
## weight 0.5553222 1.000000000 0.001608902
## age -0.1251818 0.001608902 1.000000000