#資料(data)、值(valus)、變數(varible)、物件(object)
# R 是以物件導向為主的程式語言, 在R中每一樣 “東西”, 都可視為 “物件”(object),
##一、R語言資料集合:
#1.向量(vector)
#2.清單(list)
#3.矩陣(matrix)
#4.陣列(array)
#5.因子(factor)
#6.資料框(dataframe)
# 資料分析基本上是 :產生資料物件、命名、使用函式對物件運算操作
#建立向量資料
id <- c("001","002","003","004","005" )
name <- c("superman", "batman", "spiderman", "wolverine", "wonderwoman")
height <- c(190, 188, 178, 150, 183)
weight <- c(102, 95, 76, 88, 59)
gender <- c("Male","Male","Male","Male","Female" )
#二、R 的最基本物件是向量,
# 向量是由包含相同 “模式” 的元素 (element) 組成,
# 向量物件的基本元素的類型(basic mode) 主要分成
# 六大向量(vector)類型
# 1.數值向量(numeric):包含 "integer", 整數型向量 與 "double", 倍精確度型向量。
# 2.整數向量(integer)
# 3.文字向量(character)
# 4.邏輯值向量(logical)
# 5.日期向量(Date)
# 6.日期時間向量(POSIXct)
#用class()查詢資料類型
class(name)
## [1] "character"
class(height)
## [1] "numeric"
class(gender)
## [1] "character"
class(id)
## [1] "character"
class(height)
## [1] "numeric"
q <- height > 170
q
## [1] TRUE TRUE TRUE FALSE TRUE
class(q)
## [1] "logical"
#str()查看資料結構
#summary()查看變數統計量
str(name)
## chr [1:5] "superman" "batman" "spiderman" "wolverine" "wonderwoman"
str(height)
## num [1:5] 190 188 178 150 183
str(gender)
## chr [1:5] "Male" "Male" "Male" "Male" "Female"
str(id)
## chr [1:5] "001" "002" "003" "004" "005"
str(height)
## num [1:5] 190 188 178 150 183
summary(name)
## Length Class Mode
## 5 character character
summary(height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 178.0 183.0 177.8 188.0 190.0
summary(gender)
## Length Class Mode
## 5 character character
summary(id)
## Length Class Mode
## 5 character character
summary(height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 178.0 183.0 177.8 188.0 190.0
#用[ ]取出向量資料
name
## [1] "superman" "batman" "spiderman" "wolverine" "wonderwoman"
name[2]
## [1] "batman"
name[3:5]
## [1] "spiderman" "wolverine" "wonderwoman"
name[-1]
## [1] "batman" "spiderman" "wolverine" "wonderwoman"
name[-c(1,3,5)]
## [1] "batman" "wolverine"
height
## [1] 190 188 178 150 183
height[height > 170]
## [1] 190 188 178 183
#將向量資料組合為data frame(數據框)
heroes <- data.frame(id,name,height,gender,weight )
#顯示在console區
heroes
## id name height gender weight
## 1 001 superman 190 Male 102
## 2 002 batman 188 Male 95
## 3 003 spiderman 178 Male 76
## 4 004 wolverine 150 Male 88
## 5 005 wonderwoman 183 Female 59
#看資料結構
str(heroes )
## 'data.frame': 5 obs. of 5 variables:
## $ id : chr "001" "002" "003" "004" ...
## $ name : chr "superman" "batman" "spiderman" "wolverine" ...
## $ height: num 190 188 178 150 183
## $ gender: chr "Male" "Male" "Male" "Male" ...
## $ weight: num 102 95 76 88 59
#看摘要
summary(heroes)
## id name height gender
## Length:5 Length:5 Min. :150.0 Length:5
## Class :character Class :character 1st Qu.:178.0 Class :character
## Mode :character Mode :character Median :183.0 Mode :character
## Mean :177.8
## 3rd Qu.:188.0
## Max. :190.0
## weight
## Min. : 59
## 1st Qu.: 76
## Median : 88
## Mean : 84
## 3rd Qu.: 95
## Max. :102
#存成.RData格式
save(heroes, file = "heroes.RData" )
#清空物件,然後再讀取一次.RData
load("heroes.RData")
#用[ ] 取出data frame中的資料, 其中[ 列, 欄]
heroes[2,]#取出第二列資料
## id name height gender weight
## 2 002 batman 188 Male 95
heroes[,2]#取出第二欄資料
## [1] "superman" "batman" "spiderman" "wolverine" "wonderwoman"
heroes[2, 2]##取出第二列第二欄資料
## [1] "batman"
heroes[5,]
## id name height gender weight
## 5 005 wonderwoman 183 Female 59
#用欄位名稱取出某欄的資料,有兩種方法:
heroes[["name"]]
## [1] "superman" "batman" "spiderman" "wolverine" "wonderwoman"
##或是
heroes$name
## [1] "superman" "batman" "spiderman" "wolverine" "wonderwoman"
#計算身高的平均數
heroes$height
## [1] 190 188 178 150 183
mean(heroes$height)
## [1] 177.8
heroes[1:3,]
## id name height gender weight
## 1 001 superman 190 Male 102
## 2 002 batman 188 Male 95
## 3 003 spiderman 178 Male 76
heroes[heroes$name=="spiderman",]
## id name height gender weight
## 3 003 spiderman 178 Male 76
heroes[heroes$name!="spiderman",]
## id name height gender weight
## 1 001 superman 190 Male 102
## 2 002 batman 188 Male 95
## 4 004 wolverine 150 Male 88
## 5 005 wonderwoman 183 Female 59
heroes[heroes$height>180,]
## id name height gender weight
## 1 001 superman 190 Male 102
## 2 002 batman 188 Male 95
## 5 005 wonderwoman 183 Female 59
heroes[heroes$weight<80,]
## id name height gender weight
## 3 003 spiderman 178 Male 76
## 5 005 wonderwoman 183 Female 59
#畫長條圖
heroes$height
## [1] 190 188 178 150 183
#先排序(由大到小 decreasing = T)
sort(heroes$height,decreasing = T)
## [1] 190 188 183 178 150
#畫長條圖函數
barplot(sort(heroes$height,decreasing = T))

barplot(sort(heroes$height,decreasing = T),
main ="Heros",
sub = "Yarona",
names.arg = c("superman", "batman", "spiderman", "wonderwoman", "wolverine"),
xlab ="name",
ylab ="height",
col = c("blue", "black", "red","pink","green"))

# 亞洲四小龍(Four Asian Tigers)
# https://zh.wikipedia.org/wiki/%E4%BA%9A%E6%B4%B2%E5%9B%9B%E5%B0%8F%E9%BE%99
nation <-c("Taiwan","Korea","Hong kong","Singapore")
growth <- c(0.28,0.36,0.83,1.40)
data <- data.frame(nation,growth)
sort(data$growth,decreasing = T)
## [1] 1.40 0.83 0.36 0.28
barplot(sort(data$growth,decreasing = T))

barplot(sort(data$growth,decreasing = T),
main ="Four Asian Tigers",
sub = "Yarona",
names.arg = c("Singapore","Hong kong","Korea","Taiwan"),
xlab ="Nations",
ylab ="Natural Population Growth",
col = c("blue", "black", "red","pink"))
