#上課程式碼:

#資料(data)、值(valus)、變數(varible)、物件(object)

# R 是以物件導向為主的程式語言, 在R中每一樣 “東西”, 都可視為 “物件”(object), 
##一、R語言資料集合:
#1.向量(vector)
#2.清單(list)
#3.矩陣(matrix)
#4.陣列(array)
#5.因子(factor)
#6.資料框(dataframe)
# 資料分析基本上是 :產生資料物件、命名、使用函式對物件運算操作


#建立向量資料
id <- c("001", "002", "003", "004", "005")
name <- c("superman", "batman", "spiderman", "wolverine", "wonderwoman")
height <- c(190, 188, 178, 150, 183)
weight <- c(102, 95, 76, 88, 59)
gender <- c("Male", "Male", "Male", "Male", "Female")


#二、R 的最基本物件是向量, 
# 向量是由包含相同 “模式” 的元素 (element) 組成,
# 向量物件的基本元素的類型(basic mode) 主要分成
# 六大向量(vector)類型

# 1.數值向量(numeric):包含 "integer", 整數型向量  與 "double", 倍精確度型向量。
# 2.整數向量(integer)
# 3.文字向量(character) 
# 4.邏輯值向量(logical)
# 5.日期向量(Date)
# 6.日期時間向量(POSIXct)


#用class()查詢資料類型

class(name)
## [1] "character"
class(height)
## [1] "numeric"
class(gender)
## [1] "character"
class(id)   
## [1] "character"
class(height)
## [1] "numeric"
q <- height > 170
q
## [1]  TRUE  TRUE  TRUE FALSE  TRUE
class(q)
## [1] "logical"
#str()查看資料結構
#summary()查看變數統計量




#用[ ]取出向量資料
name
## [1] "superman"    "batman"      "spiderman"   "wolverine"   "wonderwoman"
name[2]
## [1] "batman"
name[3:5]
## [1] "spiderman"   "wolverine"   "wonderwoman"
name[-1]
## [1] "batman"      "spiderman"   "wolverine"   "wonderwoman"
name[-c(1,3,5)]
## [1] "batman"    "wolverine"
height
## [1] 190 188 178 150 183
height[height > 170]
## [1] 190 188 178 183
#將向量資料組合為data frame(數據框) 
heroes <- data.frame(id, name, height, weight, gender)
heroes
##    id        name height weight gender
## 1 001    superman    190    102   Male
## 2 002      batman    188     95   Male
## 3 003   spiderman    178     76   Male
## 4 004   wolverine    150     88   Male
## 5 005 wonderwoman    183     59 Female
str(id)
##  chr [1:5] "001" "002" "003" "004" "005"
str(heroes)
## 'data.frame':    5 obs. of  5 variables:
##  $ id    : Factor w/ 5 levels "001","002","003",..: 1 2 3 4 5
##  $ name  : Factor w/ 5 levels "batman","spiderman",..: 3 1 2 4 5
##  $ height: num  190 188 178 150 183
##  $ weight: num  102 95 76 88 59
##  $ gender: Factor w/ 2 levels "Female","Male": 2 2 2 2 1
summary(heroes)
##    id             name       height          weight       gender 
##  001:1   batman     :1   Min.   :150.0   Min.   : 59   Female:1  
##  002:1   spiderman  :1   1st Qu.:178.0   1st Qu.: 76   Male  :4  
##  003:1   superman   :1   Median :183.0   Median : 88             
##  004:1   wolverine  :1   Mean   :177.8   Mean   : 84             
##  005:1   wonderwoman:1   3rd Qu.:188.0   3rd Qu.: 95             
##                          Max.   :190.0   Max.   :102
#存成.RData格式
save(heroes, file = "heroes.RData")
#清空物件,然後再讀取一次.RData
load("heroes.RData")

#用[ ] 取出data frame中的資料, 其中[ 列, 欄]
heroes[2, ]#取出第二列資料
##    id   name height weight gender
## 2 002 batman    188     95   Male
heroes[,2]#取出第二欄資料
## [1] superman    batman      spiderman   wolverine   wonderwoman
## Levels: batman spiderman superman wolverine wonderwoman
heroes[2, 2]##取出第二列第二欄資料
## [1] batman
## Levels: batman spiderman superman wolverine wonderwoman
heroes[5,]
##    id        name height weight gender
## 5 005 wonderwoman    183     59 Female
#用欄位名稱取出某欄的資料,有兩種方法:
heroes[["name"]]
## [1] superman    batman      spiderman   wolverine   wonderwoman
## Levels: batman spiderman superman wolverine wonderwoman
##或是
heroes$name
## [1] superman    batman      spiderman   wolverine   wonderwoman
## Levels: batman spiderman superman wolverine wonderwoman
heroes$height
## [1] 190 188 178 150 183
mean(heroes$height)
## [1] 177.8
heroes[1:3,]
##    id      name height weight gender
## 1 001  superman    190    102   Male
## 2 002    batman    188     95   Male
## 3 003 spiderman    178     76   Male
heroes[heroes$name=="spiderman",]
##    id      name height weight gender
## 3 003 spiderman    178     76   Male
heroes[heroes$name!="spiderman",]
##    id        name height weight gender
## 1 001    superman    190    102   Male
## 2 002      batman    188     95   Male
## 4 004   wolverine    150     88   Male
## 5 005 wonderwoman    183     59 Female
heroes[heroes$height>180,]
##    id        name height weight gender
## 1 001    superman    190    102   Male
## 2 002      batman    188     95   Male
## 5 005 wonderwoman    183     59 Female
barplot(sort(heroes$height, decreasing = TRUE), main = "Heroes", sub = "by Peter Liu", names.arg = heroes$name, xlab = "name", ylab = "height", col = c(6:10))

# 亞洲四小龍(Four Asian Tigers)
# https://zh.wikipedia.org/wiki/%E4%BA%9A%E6%B4%B2%E5%9B%9B%E5%B0%8F%E9%BE%99
nation <- c("Korea", "Taiwan", "Hong kong", "Singapore")
growth <- c(0.36, 0.28, 0.83, 1.40)
data <- data.frame(nation, growth)
barplot(sort(data$growth, decreasing = TRUE), 
        main = "Four Asian Tigers", 
        sub = "by Peter Liu", 
        names.arg = c("Singapore", "Hong kong", "Korea", "Taiwan"), 
        xlab = "Nations", 
        ylab = "Natural Population Growth",
        col = c(11:14))