#下載資料:https://www.kaggle.com/hb20007/gender-classification?select=Transformed+Data+Set+-+Sheet1.csv
#檔名為"Transformed Data Set - Sheet1.csv"
#從電腦端上傳(upload)到rstudio cloud
#install.packages("readr")
library(readr)
#讀取資料
x <-read.csv("Transformed Data Set - Sheet1.csv", stringsAsFactors = TRUE)
#查看資料結構
summary(x)
## Favorite.Color Favorite.Music.Genre Favorite.Beverage
## Cool :37 Electronic : 8 Beer :13
## Neutral: 7 Folk/Traditional: 4 Doesn't drink:14
## Warm :22 Hip hop : 8 Other :11
## Jazz/Blues : 4 Vodka : 9
## Pop :17 Whiskey : 9
## R&B and soul : 6 Wine :10
## Rock :19
## Favorite.Soft.Drink Gender
## 7UP/Sprite :13 F:33
## Coca Cola/Pepsi:32 M:33
## Fanta :14
## Other : 7
##
##
##
#重新命名欄位名稱
colnames(x)<- c("color", "music", "beverage", "drink", "gender")
#問題:男女對顏色的喜好的差異?
#次數分配表
t <- table(x$gender, x$beverage)#table(列, 欄)
t
##
## Beer Doesn't drink Other Vodka Whiskey Wine
## F 6 5 7 4 5 6
## M 7 9 4 5 4 4
#百分比次數分配表
p.t <- prop.table(t)
p.t
##
## Beer Doesn't drink Other Vodka Whiskey Wine
## F 0.09090909 0.07575758 0.10606061 0.06060606 0.07575758 0.09090909
## M 0.10606061 0.13636364 0.06060606 0.07575758 0.06060606 0.06060606
p.t*100
##
## Beer Doesn't drink Other Vodka Whiskey Wine
## F 9.090909 7.575758 10.606061 6.060606 7.575758 9.090909
## M 10.606061 13.636364 6.060606 7.575758 6.060606 6.060606
p.t <- round(p.t*100,2)#乘100,四捨五入到小數2位
p.t
##
## Beer Doesn't drink Other Vodka Whiskey Wine
## F 9.09 7.58 10.61 6.06 7.58 9.09
## M 10.61 13.64 6.06 7.58 6.06 6.06
#畫分組長條圖
barplot(p.t)

barplot(p.t, beside = TRUE)

#加上圖例與上色
label <- rownames(p.t)
label
## [1] "F" "M"
barplot(p.t,
beside = TRUE,
legend.text = label,
col = c(22:23))

#畫圓餅圖
#畫圓餅圖並加上資料標籤