0318.R

#資料(data)、值(valus)、變數(varible)、物件(object)

# R 是以物件導向為主的程式語言, 在R中每一樣 “東西”, 都可視為 “物件”(object), 
##一、R語言資料集合:
#1.向量(vector)
#2.清單(list)
#3.矩陣(matrix)
#4.陣列(array)
#5.因子(factor)
#6.資料框(dataframe)
# 資料分析基本上是 ：產生資料物件、命名、使用函式對物件運算操作


#建立向量資料
id <- c("001","002","003","004","005"  )
name <- c("superman", "batman", "spiderman", "wolverine", "wonderwoman")
height <- c(190, 188, 178, 150, 183)
weight <- c(102, 95, 76, 88, 59)
gender <- c("Male","Male","Male","Male","Female"  )


#二、R 的最基本物件是向量, 
# 向量是由包含相同 “模式” 的元素 (element) 組成，
# 向量物件的基本元素的類型(basic mode) 主要分成
# 六大向量(vector)類型

# 1.數值向量（numeric）：包含 "integer", 整數型向量  與 "double", 倍精確度型向量。
# 2.整數向量（integer）
# 3.文字向量（character） 
# 4.邏輯值向量（logical）
# 5.日期向量（Date）
# 6.日期時間向量（POSIXct）


#用class()查詢資料類型

class(name)

## [1] "character"

class(height)

## [1] "numeric"

class(gender)

## [1] "character"

class(id)

## [1] "character"

class(height)

## [1] "numeric"

q <- height > 170
q

## [1]  TRUE  TRUE  TRUE FALSE  TRUE

class(q)

## [1] "logical"

#str()查看資料結構
#summary()查看變數統計量
str(name)

##  chr [1:5] "superman" "batman" "spiderman" "wolverine" "wonderwoman"

str(height)

##  num [1:5] 190 188 178 150 183

str(gender)

##  chr [1:5] "Male" "Male" "Male" "Male" "Female"

str(id)

##  chr [1:5] "001" "002" "003" "004" "005"

str(height)

##  num [1:5] 190 188 178 150 183

summary(name)

##    Length     Class      Mode 
##         5 character character

summary(height)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   150.0   178.0   183.0   177.8   188.0   190.0

summary(gender)

##    Length     Class      Mode 
##         5 character character

summary(id)

##    Length     Class      Mode 
##         5 character character

summary(height)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   150.0   178.0   183.0   177.8   188.0   190.0

#用[ ]取出向量資料
name

## [1] "superman"    "batman"      "spiderman"   "wolverine"   "wonderwoman"

name[2]

## [1] "batman"

name[3:5]

## [1] "spiderman"   "wolverine"   "wonderwoman"

name[-1]

## [1] "batman"      "spiderman"   "wolverine"   "wonderwoman"

name[-c(1,3,5)]

## [1] "batman"    "wolverine"

height

## [1] 190 188 178 150 183

height[height > 170]

## [1] 190 188 178 183

#將向量資料組合為data frame(數據框) 
heroes <- data.frame(id,name,height,gender,weight )
#顯示在console區
heroes

##    id        name height gender weight
## 1 001    superman    190   Male    102
## 2 002      batman    188   Male     95
## 3 003   spiderman    178   Male     76
## 4 004   wolverine    150   Male     88
## 5 005 wonderwoman    183 Female     59

#看資料結構
str(heroes )

## 'data.frame':    5 obs. of  5 variables:
##  $ id    : chr  "001" "002" "003" "004" ...
##  $ name  : chr  "superman" "batman" "spiderman" "wolverine" ...
##  $ height: num  190 188 178 150 183
##  $ gender: chr  "Male" "Male" "Male" "Male" ...
##  $ weight: num  102 95 76 88 59

#看摘要
summary(heroes)

##       id                name               height         gender         
##  Length:5           Length:5           Min.   :150.0   Length:5          
##  Class :character   Class :character   1st Qu.:178.0   Class :character  
##  Mode  :character   Mode  :character   Median :183.0   Mode  :character  
##                                        Mean   :177.8                     
##                                        3rd Qu.:188.0                     
##                                        Max.   :190.0                     
##      weight   
##  Min.   : 59  
##  1st Qu.: 76  
##  Median : 88  
##  Mean   : 84  
##  3rd Qu.: 95  
##  Max.   :102

#存成.RData格式
save(heroes, file = "heroes.RData" )
#清空物件，然後再讀取一次.RData
load("heroes.RData")

#用[ ] 取出data frame中的資料, 其中[ 列, 欄]
heroes[2,]#取出第二列資料

##    id   name height gender weight
## 2 002 batman    188   Male     95

heroes[,2]#取出第二欄資料

## [1] "superman"    "batman"      "spiderman"   "wolverine"   "wonderwoman"

heroes[2, 2]##取出第二列第二欄資料

## [1] "batman"

heroes[5,]

##    id        name height gender weight
## 5 005 wonderwoman    183 Female     59

#用欄位名稱取出某欄的資料，有兩種方法：
heroes[["name"]]

## [1] "superman"    "batman"      "spiderman"   "wolverine"   "wonderwoman"

##或是
heroes$name

## [1] "superman"    "batman"      "spiderman"   "wolverine"   "wonderwoman"

#計算身高的平均數
heroes$height

## [1] 190 188 178 150 183

mean(heroes$height)

## [1] 177.8

heroes[1:3,]

##    id      name height gender weight
## 1 001  superman    190   Male    102
## 2 002    batman    188   Male     95
## 3 003 spiderman    178   Male     76

heroes[heroes$name=="spiderman",]

##    id      name height gender weight
## 3 003 spiderman    178   Male     76

heroes[heroes$name!="spiderman",]

##    id        name height gender weight
## 1 001    superman    190   Male    102
## 2 002      batman    188   Male     95
## 4 004   wolverine    150   Male     88
## 5 005 wonderwoman    183 Female     59

heroes[heroes$height>180,]

##    id        name height gender weight
## 1 001    superman    190   Male    102
## 2 002      batman    188   Male     95
## 5 005 wonderwoman    183 Female     59

heroes[heroes$weight<80,]

##    id        name height gender weight
## 3 003   spiderman    178   Male     76
## 5 005 wonderwoman    183 Female     59

#畫長條圖
heroes$height

## [1] 190 188 178 150 183

#先排序（由大到小 decreasing = T）
sort(heroes$height,decreasing = T)

## [1] 190 188 183 178 150

#畫長條圖函數
barplot(sort(heroes$height,decreasing = T))

barplot(sort(heroes$height,decreasing = T),
        main ="Heros", 
        sub = "Yarona",        
        names.arg =  c("superman", "batman", "spiderman", "wonderwoman", "wolverine"), 
        xlab ="name", 
        ylab ="height", 
        col = c("blue", "black", "red","pink","green"))

# 亞洲四小龍(Four Asian Tigers)
# https://zh.wikipedia.org/wiki/%E4%BA%9A%E6%B4%B2%E5%9B%9B%E5%B0%8F%E9%BE%99
nation <-c("Taiwan","Korea","Hong kong","Singapore")
  growth <- c(0.28,0.36,0.83,1.40)
  data <- data.frame(nation,growth)
sort(data$growth,decreasing = T)

## [1] 1.40 0.83 0.36 0.28

barplot(sort(data$growth,decreasing = T))

barplot(sort(data$growth,decreasing = T),
        main ="Four Asian Tigers", 
        sub = "Yarona",        
        names.arg =  c("Singapore","Hong kong","Korea","Taiwan"), 
        xlab ="Nations", 
        ylab ="Natural Population Growth", 
        col = c("blue", "black", "red","pink"))

0318.R

rstudio-user

2022-03-18