#下載資料:https://www.kaggle.com/hb20007/gender-classification?select=Transformed+Data+Set+-+Sheet1.csv
#檔名為"Transformed Data Set - Sheet1.csv"
#從電腦端上傳(upload)到rstudio cloud

#install.packages("readr")
library(readr)
#讀取資料,命名為"x"
x<-read.csv("Transformed Data Set - Sheet1.csv", stringsAsFactors = TRUE)
#查看資料結構
summary(x)
##  Favorite.Color       Favorite.Music.Genre     Favorite.Beverage
##  Cool   :37     Electronic      : 8        Beer         :13     
##  Neutral: 7     Folk/Traditional: 4        Doesn't drink:14     
##  Warm   :22     Hip hop         : 8        Other        :11     
##                 Jazz/Blues      : 4        Vodka        : 9     
##                 Pop             :17        Whiskey      : 9     
##                 R&B and soul    : 6        Wine         :10     
##                 Rock            :19                             
##       Favorite.Soft.Drink Gender
##  7UP/Sprite     :13       F:33  
##  Coca Cola/Pepsi:32       M:33  
##  Fanta          :14             
##  Other          : 7             
##                                 
##                                 
## 
#重新命名欄位名稱
colnames(x) <-  c("color", "music", "beverage", "drink", "gender")   




#問題:男女對顏色的喜好的差異?

#次數分配表
t <- table(x$gender,x$music)
  t
##    
##     Electronic Folk/Traditional Hip hop Jazz/Blues Pop R&B and soul Rock
##   F          2                2       1          3  13            2   10
##   M          6                2       7          1   4            4    9
#百分比次數分配表
p.t <-prop.table(t) 
  p.t
##    
##     Electronic Folk/Traditional    Hip hop Jazz/Blues        Pop R&B and soul
##   F 0.03030303       0.03030303 0.01515152 0.04545455 0.19696970   0.03030303
##   M 0.09090909       0.03030303 0.10606061 0.01515152 0.06060606   0.06060606
##    
##           Rock
##   F 0.15151515
##   M 0.13636364
p.t <- p.t*100
p.t <- round(p.t,2)
p.t#畫分組長條圖
##    
##     Electronic Folk/Traditional Hip hop Jazz/Blues   Pop R&B and soul  Rock
##   F       3.03             3.03    1.52       4.55 19.70         3.03 15.15
##   M       9.09             3.03   10.61       1.52  6.06         6.06 13.64
barplot(p.t)

barplot(p.t, beside=TRUE)

#加上圖例與上色
label <- rownames(p.t)

barplot(p.t, 
           beside = TRUE, 
           legend.text =  label, 
           col =  c(6:9))