讀取小學生實驗結果csv檔案
data = read.csv('pho_survey_small.csv',fileEncoding = 'UTF-8')
進行資料探索
# 每題(col_name)有幾種選項(cnt),最多人答的選項(common)和其比例(pcnt)
data %>% inspect_cat()
## # A tibble: 37 x 5
## col_name cnt common common_pcnt levels
## <chr> <int> <chr> <dbl> <named list>
## 1 A01 1 s 100 <tibble[,3] [1 x 3]>
## 2 A02 3 s 70 <tibble[,3] [3 x 3]>
## 3 A03 4 s 50 <tibble[,3] [4 x 3]>
## 4 A04 3 s 60 <tibble[,3] [3 x 3]>
## 5 A05 3 s 60 <tibble[,3] [3 x 3]>
## 6 A06 2 s 80 <tibble[,3] [2 x 3]>
## 7 A07 3 not 40 <tibble[,3] [3 x 3]>
## 8 A08 3 s 40 <tibble[,3] [3 x 3]>
## 9 A09 4 s 60 <tibble[,3] [4 x 3]>
## 10 A10 3 s 80 <tibble[,3] [3 x 3]>
## # ... with 27 more rows
# remove 'name'
df <- data[2:37] %>%inspect_cat()
questions <- colnames(data[2:37])
以長條圖顯示各題目set中,有出現的選項的比例
# 看單題: df$levels$B05
# 只顯示次數大於4的選項:show_plot(high_cardinality = 4)
# 畫圖的function
getPropPlot <- function(df, setQ){
df %>%
filter(str_detect(col_name, setQ))%>%
show_plot()
}
# 畫圖
getPropPlot(df,'A')
getPropPlot(df,'B')
getPropPlot(df,'C')