讀取小學生實驗結果csv檔案

data = read.csv('pho_survey_small.csv',fileEncoding = 'UTF-8')

進行資料探索

# 每題(col_name)有幾種選項(cnt),最多人答的選項(common)和其比例(pcnt)
data %>% inspect_cat()
## # A tibble: 37 x 5
##    col_name   cnt common common_pcnt levels              
##    <chr>    <int> <chr>        <dbl> <named list>        
##  1 A01          1 s              100 <tibble[,3] [1 x 3]>
##  2 A02          3 s               70 <tibble[,3] [3 x 3]>
##  3 A03          4 s               50 <tibble[,3] [4 x 3]>
##  4 A04          3 s               60 <tibble[,3] [3 x 3]>
##  5 A05          3 s               60 <tibble[,3] [3 x 3]>
##  6 A06          2 s               80 <tibble[,3] [2 x 3]>
##  7 A07          3 not             40 <tibble[,3] [3 x 3]>
##  8 A08          3 s               40 <tibble[,3] [3 x 3]>
##  9 A09          4 s               60 <tibble[,3] [4 x 3]>
## 10 A10          3 s               80 <tibble[,3] [3 x 3]>
## # ... with 27 more rows
# remove 'name'
df <- data[2:37] %>%inspect_cat()
questions <- colnames(data[2:37])

以長條圖顯示各題目set中,有出現的選項的比例

# 看單題: df$levels$B05
# 只顯示次數大於4的選項:show_plot(high_cardinality = 4)

# 畫圖的function
getPropPlot <- function(df, setQ){
  df %>% 
    filter(str_detect(col_name, setQ))%>%
    show_plot()
}

# 畫圖
getPropPlot(df,'A')

getPropPlot(df,'B')

getPropPlot(df,'C')