##R語言資料分析期中考:
#輸入學號: 001 和名字:Shaq
##壹、自建資料與圖表(請見社團範例)
#(1-1)建立一個data frame,包括brand品牌與market_share市站率
#(1-2)畫長條圖
brand <- c("Sansung", "Apple", "oppo")
market_share <- c(31.7, 22.8, 9.3)
df <- data.frame(brand, market_share)
df
## brand market_share
## 1 Sansung 31.7
## 2 Apple 22.8
## 3 oppo 9.3
barplot(df$market_share,
main = "Global Smartphone Market Share",
sub = "by Peter Liu",
names.arg = c("Sansung", "Apple", "oppo"),
xlab = "brand",
ylab = "market_share",
col = c(11:14))

#貳、比較gender 性別與lunch 營養午餐類型
#讀入外部資料
sp <- read.csv(file = "StudentsPerformance.csv", stringsAsFactors = TRUE)
summary(sp)
## gender race.ethnicity parental.level.of.education lunch
## female:518 group A: 89 associate's degree:222 free/reduced:355
## male :482 group B:190 bachelor's degree :118 standard :645
## group C:319 high school :196
## group D:262 master's degree : 59
## group E:140 some college :226
## some high school :179
## test.preparation.course math.score reading.score writing.score
## completed:358 Min. : 0.00 Min. : 17.00 Min. : 10.00
## none :642 1st Qu.: 57.00 1st Qu.: 59.00 1st Qu.: 57.75
## Median : 66.00 Median : 70.00 Median : 69.00
## Mean : 66.09 Mean : 69.17 Mean : 68.05
## 3rd Qu.: 77.00 3rd Qu.: 79.00 3rd Qu.: 79.00
## Max. :100.00 Max. :100.00 Max. :100.00
#欄位名稱------------------------------------------
# gender 性別
# race.ethnicity 種族分群
# parental.level.of.education 父母教育程度
# lunch 營養午餐類型(free/reduced免費或減免餐費,standard為一般類別)
# test.preparation.course
# math.score 數學成績
# reading.score 閱讀成績
# writing.score 寫作成績
#(2-1)計算不同gender 性別與lunch 營養午餐人數的比例(交叉分析表)
#(2-2)呈現gender 性別與lunch 營養午餐人數的圖表
t1 <- table(sp$gender, sp$lunch)
t1
##
## free/reduced standard
## female 189 329
## male 166 316
p.t1 <- prop.table(t1)
p.t1 <- round(p.t1*100,2)
p.t1
##
## free/reduced standard
## female 18.9 32.9
## male 16.6 31.6
label <- rownames(p.t1)
barplot(p.t1,
beside = TRUE,
legend.text = label,
col = c("pink", "lightblue"),
main = "Gender/Lunch",
sub = "By Peter Liu")

#參、math.score 數學成績的直方圖與盒狀圖
boxplot(sp$math.score, col = "cyan",
main = "Math Score boxplot",
sub = "By Peter Liu")

hist(sp$math.score, col = "yellow",
xlab = "Math Score",
main = "Math Score histgram",
sub = "By Peter Liu")

#肆、math.score 數學成績的最大值、最小值、平均數、中位數、標準差
max(sp$math.score)#算最大值
## [1] 100
min(sp$math.score)#算最小值
## [1] 0
mean(sp$math.score)#算平均數
## [1] 66.089
median(sp$math.score)#算中位數
## [1] 66
sd(sp$math.score)#算標準差
## [1] 15.16308
#五、呈現以下兩組關係的散佈圖
#(1)math.score 數學成績與與writing.score 寫作成績
#(2)reading.score 閱讀成績與writing.score 寫作成績
par(mfrow= c(1,2))
plot(sp$math.score, sp$writing, col = "blue")
plot(sp$reading.score, sp$writing, col = "red")

#六、計算以下兩組關係的相關係數
#(1)math.score 數學成績與與writing.score 寫作成績
#(2)reading.score 閱讀成績與writing.score 寫作成績
cor(sp$math.score, sp$writing)
## [1] 0.802642
cor(sp$reading.score, sp$writing)
## [1] 0.9545981
#七、計算不同gender 性別的math.score 數學成績並畫長條圖
math <- tapply(sp$math.score, sp$gender, mean)
math
## female male
## 63.63320 68.72822
barplot(sort(math, decreasing = T),
col = c("lightblue", "pink"),
xlab = "Gender",
ylab = "Math Score",
main = "Gender/Math Score",
sub = "By Peter Liu")
