#下載資料:https://www.kaggle.com/hb20007/gender-classification?select=Transformed+Data+Set+-+Sheet1.csv
#檔名為"Transformed Data Set - Sheet1.csv"
#從電腦端上傳(upload)到rstudio cloud
install.packages("readr")#第一次使用需安裝
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(readr)#用library呼叫套件
#讀取資料,命名為"x"
x<- read.csv("favorite.csv", stringsAsFactors = TRUE)
#檢視資料前10筆
head(x,10)
## Favorite.Color Favorite.Music.Genre Favorite.Beverage Favorite.Soft.Drink
## 1 Cool Rock Vodka 7UP/Sprite
## 2 Neutral Hip hop Vodka Coca Cola/Pepsi
## 3 Warm Rock Wine Coca Cola/Pepsi
## 4 Warm Folk/Traditional Whiskey Fanta
## 5 Cool Rock Vodka Coca Cola/Pepsi
## 6 Warm Jazz/Blues Doesn't drink Fanta
## 7 Cool Pop Beer Coca Cola/Pepsi
## 8 Warm Pop Whiskey Fanta
## 9 Warm Rock Other 7UP/Sprite
## 10 Neutral Pop Wine Coca Cola/Pepsi
## Gender
## 1 F
## 2 F
## 3 F
## 4 F
## 5 F
## 6 F
## 7 F
## 8 F
## 9 F
## 10 F
#查看資料結構與摘要
str(x)
## 'data.frame': 66 obs. of 5 variables:
## $ Favorite.Color : Factor w/ 3 levels "Cool","Neutral",..: 1 2 3 3 1 3 1 3 3 2 ...
## $ Favorite.Music.Genre: Factor w/ 7 levels "Electronic","Folk/Traditional",..: 7 3 7 2 7 4 5 5 7 5 ...
## $ Favorite.Beverage : Factor w/ 6 levels "Beer","Doesn't drink",..: 4 4 6 5 4 2 1 5 3 6 ...
## $ Favorite.Soft.Drink : Factor w/ 4 levels "7UP/Sprite","Coca Cola/Pepsi",..: 1 2 2 3 2 3 2 3 1 2 ...
## $ Gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
summary(x)
## Favorite.Color Favorite.Music.Genre Favorite.Beverage
## Cool :37 Electronic : 8 Beer :13
## Neutral: 7 Folk/Traditional: 4 Doesn't drink:14
## Warm :22 Hip hop : 8 Other :11
## Jazz/Blues : 4 Vodka : 9
## Pop :17 Whiskey : 9
## R&B and soul : 6 Wine :10
## Rock :19
## Favorite.Soft.Drink Gender
## 7UP/Sprite :13 F:33
## Coca Cola/Pepsi:32 M:33
## Fanta :14
## Other : 7
##
##
##
#重新命名欄位名稱
colnames(x) #檢視欄位名稱
## [1] "Favorite.Color" "Favorite.Music.Genre" "Favorite.Beverage"
## [4] "Favorite.Soft.Drink" "Gender"
colnames(x) <- c("color", "music", "beverage", "drink", "gender")
###類別資料分析###
#問題一:男女生各有多少人?
table(x$gender)#算次數
##
## F M
## 33 33
prop.table(table(x$gender))#算百分比
##
## F M
## 0.5 0.5
#問題二:最喜歡的color、music、beverage、drink、gender(擇一分析)?
table(x$beverage)
##
## Beer Doesn't drink Other Vodka Whiskey
## 13 14 11 9 9
## Wine
## 10
prop.table(table(x$beverage))
##
## Beer Doesn't drink Other Vodka Whiskey
## 0.1969697 0.2121212 0.1666667 0.1363636 0.1363636
## Wine
## 0.1515152
#問題三:男女對顏色的喜好的差異?
#交叉次數分配表
t <- table(x$gender,x$color)
t
##
## Cool Neutral Warm
## F 17 3 13
## M 20 4 9
#百分比次數分配表
p.t1 <- prop.table(t,1)
p.t1
##
## Cool Neutral Warm
## F 0.51515152 0.09090909 0.39393939
## M 0.60606061 0.12121212 0.27272727
#將次數變成百分比(乘以100)
p.t2 <- p.t1*100
p.t2
##
## Cool Neutral Warm
## F 51.515152 9.090909 39.393939
## M 60.606061 12.121212 27.272727
#四捨五入至小數2位
p.t3 <- round(p.t2,2)
p.t3
##
## Cool Neutral Warm
## F 51.52 9.09 39.39
## M 60.61 12.12 27.27
#畫長條圖
barplot(p.t3)

#畫分組長條圖
barplot(p.t3, beside =TRUE)

#加上圖例與上色
rownames(p.t3)
## [1] "F" "M"
label <- rownames(p.t3)
label
## [1] "F" "M"
barplot(p.t3,
beside =TRUE,
legend.text =label,
col = c("pink","navy"))
