#下載資料:https://www.kaggle.com/hb20007/gender-classification?select=Transformed+Data+Set+-+Sheet1.csv
#檔名為"Transformed Data Set - Sheet1.csv"
#從電腦端上傳(upload)到rstudio cloud

#install.packages("readr")
library(readr)
#讀取資料
x <-read.csv("Transformed Data Set - Sheet1.csv", stringsAsFactors = TRUE)
#查看資料結構
summary(x)
##  Favorite.Color       Favorite.Music.Genre     Favorite.Beverage
##  Cool   :37     Electronic      : 8        Beer         :13     
##  Neutral: 7     Folk/Traditional: 4        Doesn't drink:14     
##  Warm   :22     Hip hop         : 8        Other        :11     
##                 Jazz/Blues      : 4        Vodka        : 9     
##                 Pop             :17        Whiskey      : 9     
##                 R&B and soul    : 6        Wine         :10     
##                 Rock            :19                             
##       Favorite.Soft.Drink Gender
##  7UP/Sprite     :13       F:33  
##  Coca Cola/Pepsi:32       M:33  
##  Fanta          :14             
##  Other          : 7             
##                                 
##                                 
## 
#重新命名欄位名稱
colnames(x)<- c("color", "music", "beverage", "drink", "gender")     


#問題:男女對顏色的喜好的差異?

#次數分配表
t <- table(x$gender, x$beverage)#table(列, 欄)
t
##    
##     Beer Doesn't drink Other Vodka Whiskey Wine
##   F    6             5     7     4       5    6
##   M    7             9     4     5       4    4
#百分比次數分配表
p.t <- prop.table(t)
p.t
##    
##           Beer Doesn't drink      Other      Vodka    Whiskey       Wine
##   F 0.09090909    0.07575758 0.10606061 0.06060606 0.07575758 0.09090909
##   M 0.10606061    0.13636364 0.06060606 0.07575758 0.06060606 0.06060606
p.t*100
##    
##          Beer Doesn't drink     Other     Vodka   Whiskey      Wine
##   F  9.090909      7.575758 10.606061  6.060606  7.575758  9.090909
##   M 10.606061     13.636364  6.060606  7.575758  6.060606  6.060606
p.t <- round(p.t*100,2)#乘100,四捨五入到小數2位
p.t
##    
##      Beer Doesn't drink Other Vodka Whiskey  Wine
##   F  9.09          7.58 10.61  6.06    7.58  9.09
##   M 10.61         13.64  6.06  7.58    6.06  6.06
#畫分組長條圖
barplot(p.t)

barplot(p.t, beside = TRUE)

#加上圖例與上色
label <- rownames(p.t)
label
## [1] "F" "M"
barplot(p.t, 
        beside = TRUE, 
        legend.text = label, 
        col = c(22:23))

#畫圓餅圖


#畫圓餅圖並加上資料標籤