#下載資料:https://www.kaggle.com/hb20007/gender-classification?select=Transformed+Data+Set+-+Sheet1.csv
#檔名為"Transformed Data Set - Sheet1.csv"
#從電腦端上傳(upload)到rstudio cloud

install.packages("readr")#第一次使用需安裝
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(readr)#用library呼叫套件
#讀取資料,命名為"x"
x<- read.csv("favorite.csv", stringsAsFactors = TRUE)
#檢視資料前10筆
head(x,10)
##    Favorite.Color Favorite.Music.Genre Favorite.Beverage Favorite.Soft.Drink
## 1            Cool                 Rock             Vodka          7UP/Sprite
## 2         Neutral              Hip hop             Vodka     Coca Cola/Pepsi
## 3            Warm                 Rock              Wine     Coca Cola/Pepsi
## 4            Warm     Folk/Traditional           Whiskey               Fanta
## 5            Cool                 Rock             Vodka     Coca Cola/Pepsi
## 6            Warm           Jazz/Blues     Doesn't drink               Fanta
## 7            Cool                  Pop              Beer     Coca Cola/Pepsi
## 8            Warm                  Pop           Whiskey               Fanta
## 9            Warm                 Rock             Other          7UP/Sprite
## 10        Neutral                  Pop              Wine     Coca Cola/Pepsi
##    Gender
## 1       F
## 2       F
## 3       F
## 4       F
## 5       F
## 6       F
## 7       F
## 8       F
## 9       F
## 10      F
#查看資料結構與摘要
str(x)
## 'data.frame':    66 obs. of  5 variables:
##  $ Favorite.Color      : Factor w/ 3 levels "Cool","Neutral",..: 1 2 3 3 1 3 1 3 3 2 ...
##  $ Favorite.Music.Genre: Factor w/ 7 levels "Electronic","Folk/Traditional",..: 7 3 7 2 7 4 5 5 7 5 ...
##  $ Favorite.Beverage   : Factor w/ 6 levels "Beer","Doesn't drink",..: 4 4 6 5 4 2 1 5 3 6 ...
##  $ Favorite.Soft.Drink : Factor w/ 4 levels "7UP/Sprite","Coca Cola/Pepsi",..: 1 2 2 3 2 3 2 3 1 2 ...
##  $ Gender              : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
summary(x)
##  Favorite.Color       Favorite.Music.Genre     Favorite.Beverage
##  Cool   :37     Electronic      : 8        Beer         :13     
##  Neutral: 7     Folk/Traditional: 4        Doesn't drink:14     
##  Warm   :22     Hip hop         : 8        Other        :11     
##                 Jazz/Blues      : 4        Vodka        : 9     
##                 Pop             :17        Whiskey      : 9     
##                 R&B and soul    : 6        Wine         :10     
##                 Rock            :19                             
##       Favorite.Soft.Drink Gender
##  7UP/Sprite     :13       F:33  
##  Coca Cola/Pepsi:32       M:33  
##  Fanta          :14             
##  Other          : 7             
##                                 
##                                 
## 
#重新命名欄位名稱
colnames(x) #檢視欄位名稱
## [1] "Favorite.Color"       "Favorite.Music.Genre" "Favorite.Beverage"   
## [4] "Favorite.Soft.Drink"  "Gender"
colnames(x) <- c("color", "music", "beverage", "drink", "gender")     

###類別資料分析###

#問題一:男女生各有多少人?
table(x$gender)#算次數
## 
##  F  M 
## 33 33
prop.table(table(x$gender))#算百分比
## 
##   F   M 
## 0.5 0.5
#問題二:最喜歡的color、music、beverage、drink、gender(擇一分析)?
table(x$beverage)
## 
##          Beer Doesn't drink         Other         Vodka       Whiskey 
##            13            14            11             9             9 
##          Wine 
##            10
prop.table(table(x$beverage))
## 
##          Beer Doesn't drink         Other         Vodka       Whiskey 
##     0.1969697     0.2121212     0.1666667     0.1363636     0.1363636 
##          Wine 
##     0.1515152
#問題三:男女對顏色的喜好的差異?
#交叉次數分配表
t <- table(x$gender,x$color)
t
##    
##     Cool Neutral Warm
##   F   17       3   13
##   M   20       4    9
#百分比次數分配表
p.t1 <- prop.table(t,1)
p.t1
##    
##           Cool    Neutral       Warm
##   F 0.51515152 0.09090909 0.39393939
##   M 0.60606061 0.12121212 0.27272727
#將次數變成百分比(乘以100)
p.t2 <- p.t1*100
p.t2
##    
##          Cool   Neutral      Warm
##   F 51.515152  9.090909 39.393939
##   M 60.606061 12.121212 27.272727
#四捨五入至小數2位
p.t3 <- round(p.t2,2)
p.t3
##    
##      Cool Neutral  Warm
##   F 51.52    9.09 39.39
##   M 60.61   12.12 27.27
#畫長條圖
barplot(p.t3)

#畫分組長條圖
barplot(p.t3, beside =TRUE)

#加上圖例與上色
rownames(p.t3)
## [1] "F" "M"
label <- rownames(p.t3)
label
## [1] "F" "M"
barplot(p.t3, 
        beside =TRUE, 
        legend.text =label, 
        col = c("pink","navy"))