[[[$這部分來探討R最基礎的資料結構- Atomic Vector ,再討論以原子向量為延伸,加入 dim 屬性的 Array 。接著討論屬於異質資料的 List 與 Data Frame 。
原子向量(atomic vector)又稱為 「同質性向量(homogeneous)」
v <- c(TRUE, FALSE)
class(v)
## [1] "logical"
v
## [1] TRUE FALSE
v <- c(TRUE, FALSE, 100L)
class(v)
## [1] "integer"
v
## [1] 1 0 100
v <- c(TRUE, FALSE, 100L, 100)
class(v)
## [1] "numeric"
v
## [1] 1 0 100 100
v <- c(TRUE, FALSE, 100L, 100, 100 + 0i)
class(v)
## [1] "complex"
v
## [1] 1+0i 0+0i 100+0i 100+0i 100+0i
v <- c(TRUE, FALSE, 100L, 100, "100")
class(v)
## [1] "character"
v
## [1] "TRUE" "FALSE" "100" "100" "100"
?vector # 查詢vector函數
vector() # 空向量(empty vector)
## logical(0)
vector(mode = "logical", length = 5) # 各個不同類別型態的向量初始化
## [1] FALSE FALSE FALSE FALSE FALSE
vector(mode = "numeric", length = 5) # 注意向量元素的內容
## [1] 0 0 0 0 0
vector(mode = "integer", length = 5)
## [1] 0 0 0 0 0
vector(mode = "character", length = 5)
## [1] "" "" "" "" ""
vector(mode = "complex", length = 5)
## [1] 0+0i 0+0i 0+0i 0+0i 0+0i
# NOT RUN
# 1x <- 100
# 錯誤: unexpected symbol in "1x"
# _x <- 100
# 錯誤: unexpected input in "_"
# x y <- 100
# 錯誤: unexpected symbol in "x y"
另外,如用dot開頭(ex:.xx)命名時,變數會被創建,但物件名不會出現在Global Enviroment中。但可透過 ls() 中將參數 all.names 設定為 TRUE 即可
.x <- 100
ls() # 看不到.x
## [1] "v"
ls(all.names = TRUE) # 可看見.x
## [1] ".x" "v"
如要打破上述命名規則,可使用成對『``』將名字放在其中:
`1x` <- 100
`1x`
## [1] 100
`_x` <- 100
`_x`
## [1] 100
`:)` <- 100
`:)`
## [1] 100
`x y` <- 100
`x y`
## [1] 100
常見的R指令(如二元運算子),背後也都有一個函數作支援:
10 > 2
## [1] TRUE
`>`(10, 2) # 指令與上行相同
## [1] TRUE
x <- 100
x
## [1] 100
`<-`(x, 100) # 指令與上行相同
x
## [1] 100
dim(x = 1:10) ## NULL: 代表『不存在』意義的一個R物件
## NULL
dim(x = vector()) # 即使空向量也是
## NULL
class(x = rnorm) # function物件
## [1] "function"
100 # 數字100
## [1] 100
class(x = 100) # 查詢物件的類別型態
## [1] "numeric"
rnorm(n = 10, mean = 5, sd = 2) # 產生常態分配的亂數
## [1] 9.4875071 0.8865933 7.5264319 5.8365288 3.8103149 7.2210596 6.3525279
## [8] 5.8351004 7.2734849 4.3776636
1e-3
## [1] 0.001
1E-3 # 大寫E與小寫e皆可
## [1] 0.001
class(x = 2.78e-3)
## [1] "numeric"
x <- 1:10
length(x = x)
## [1] 10
dim(x = x)
## NULL
class(x = x)
## [1] "integer"
1L
## [1] 1
class(x = 1L)
## [1] "integer"
1:10; 10:1 # 加;號,簡短程式可寫在同一行
## [1] 1 2 3 4 5 6 7 8 9 10
## [1] 10 9 8 7 6 5 4 3 2 1
TRUE
## [1] TRUE
FALSE
## [1] FALSE
c(T, T, F, F) # 縮寫
## [1] TRUE TRUE FALSE FALSE
"A"
## [1] "A"
'A'
## [1] "A"
c("A", "B", "C")
## [1] "A" "B" "C"
"" # 空字串
## [1] ""
# 常用的字元字串向量
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
LETTERS
## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
month.abb
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
month.name
## [1] "January" "February" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
clx1 <- 1 + 1i
class(clx1)
## [1] "complex"
clx2 <- 1 + 0i
class(clx2)
## [1] "complex"
# 1 + 1*i
# not run
\(\sqrt{-1}\) :
sqrt(-1 + 0i) # 可得虛數i
## [1] 0+1i
not available 的縮寫,代表『遺失值』意義。
class(NA)
## [1] "logical"
class(NA_character_); class(NA_complex_); class(NA_integer_); class(NA_complex_)
## [1] "character"
## [1] "complex"
## [1] "integer"
## [1] "complex"
length(NA)
## [1] 1
x <- c(1, 2, NA, 4, NA)
x == NA
## [1] NA NA NA NA NA
# 判斷是否為NA,請用is.na()
is.na(x)
## [1] FALSE FALSE TRUE FALSE TRUE
代表『不存在』意義的特殊物件。
class(NULL)
## [1] "NULL"
length(NULL)
## [1] 0
NULL == NULL
## logical(0)
NULL == NA
## logical(0)
NULL == 0
## logical(0)
NULL == NULL
## logical(0)
is.null(NULL)
## [1] TRUE
class(Inf)
## [1] "numeric"
print(1/0)
## [1] Inf
print(log(0))
## [1] -Inf
1 + 1/0
## [1] Inf
Inf == Inf
## [1] TRUE
Inf == -Inf
## [1] FALSE
is.infinite(-Inf)
## [1] TRUE
is.infinite(Inf)
## [1] TRUE
is.finite(Inf)
## [1] FALSE
x <- 1:5
x
## [1] 1 2 3 4 5
names(x)
## NULL
names(x) <- c("A", "B", "C", "D", "E")
x
## A B C D E
## 1 2 3 4 5
由之前的範例可知,R的向量不具有維度(dim)屬性
dim(x)
## NULL
值得注意的是,長度(length())並不算屬性。
[v <- c(1, 4, 5, 2, 8)
length(v)
## [1] 5
v[1]
## [1] 1
v[3]
## [1] 5
v[1:3]
## [1] 1 4 5
# v[1,3, 3]
v[c(1, 3, 5)]
## [1] 1 5 8
v[7]
## [1] NA
v[4:8]
## [1] 2 8 NA NA NA
v <- c(1, 4, 5, 2, 8)
names(v) <- letters[1:5]
v
## a b c d e
## 1 4 5 2 8
v["a"]
## a
## 1
v[c("a", "c", "e")]
## a c e
## 1 5 8
v <- 1:5
length(v)
## [1] 5
v[-1]
## [1] 2 3 4 5
v[-c(1, 3, 5)]
## [1] 2 4
v[-(1:3)]
## [1] 4 5
v[-8] # 欲刪除之index不存在
## [1] 1 2 3 4 5
v <- 1:5
v[c(T, T, F, F, T)]
## [1] 1 2 5
v <- 1:5
v[NULL]
## integer(0)
v <- 1:5
v[]
## [1] 1 2 3 4 5
v <- 1:4
v[0]
## integer(0)
w <- letters[1:5]
w[0]
## character(0)
v <- 1:5
v[NA]
## [1] NA NA NA NA NA
# v <- 1:4
# v[c(1, -3, 4)]
# Error in v[c(1, -3, 4)] : 只有負數下標中才能有 0
v <- 1:5
v[c(1, 2, NA, 4)]
## [1] 1 2 NA 4
v <- 1:5
v[c(-1, -2, 0, -4)]
## [1] 3 5
v <- 1:5
v[c(0, 1, 2)]
## [1] 1 2
v[c(1, 0, 2)]
## [1] 1 2
v[c(1, 2, 0)]
## [1] 1 2
v <- 1:5
v[c(0, -1, -2)]
## [1] 3 4 5
v <- 1:5
v[NULL]
## integer(0)
v[c(NULL, 1, 3)]
## [1] 1 3
# v[c(-1, -2, NA)]
# Error in v[c(-1, -2, NA)] : 只有負數下標中才能有 0
set.seed(seed = 1000) # 設定亂數種子
d <- rnorm(n = 100, mean = 10, sd = 5)
head(d) # 觀察前6筆資料
## [1] 7.771109 3.970717 10.205632 13.196942 6.067228 8.072554
tail(d) # 觀察後6筆資料
## [1] 9.478942 12.339197 12.219604 14.142764 8.064749 20.094691
tail(d, n = 10) # 可透過參數n設定,觀察更多筆資料
## [1] 19.857662 0.395024 12.310630 9.196380 9.478942 12.339197 12.219604
## [8] 14.142764 8.064749 20.094691
names(d)
## NULL
names(d) <- sample(x = c(letters, 1:9), size = 100, replace = TRUE) # 設定names屬性
透過R集合(set)相關的函數操作,可將向量視為集合,如集合的交集、聯集與差集等。
?setdiff # 查詢相關集合函數
setequal(x = c(1, 1, 2, 3, 3, 3), y = c(1, 3, 2)) # 集合的比較
## [1] TRUE
x <- names(d) # 抓出每個樣本點的名稱
x # 注意:名稱會有重複的現象。
## [1] "6" "t" "w" "1" "w" "b" "p" "r" "e" "7" "p" "f" "2" "h" "s" "x" "5" "g"
## [19] "y" "3" "9" "t" "s" "r" "e" "f" "u" "2" "t" "d" "9" "e" "9" "w" "a" "a"
## [37] "m" "v" "2" "u" "i" "8" "h" "a" "a" "b" "a" "7" "9" "u" "g" "z" "8" "z"
## [55] "s" "1" "o" "h" "s" "o" "v" "h" "v" "f" "z" "x" "7" "n" "c" "d" "7" "7"
## [73] "e" "5" "p" "y" "y" "v" "d" "o" "r" "r" "p" "z" "c" "e" "r" "g" "m" "6"
## [91] "i" "c" "f" "6" "q" "t" "2" "o" "l" "n"
setdiff(x = x, y = as.character(1:9)) # 以集合的方式扣除掉數字名後得出所有英文字母的名稱,不會有重複
## [1] "t" "w" "b" "p" "r" "e" "f" "h" "s" "x" "g" "y" "u" "d" "a" "m" "v" "i" "z"
## [20] "o" "n" "c" "q" "l"
d[setdiff(x = x, y = as.character(1:9))]
## t w b p r e f
## 3.9707172 10.2056316 8.0725535 7.6206606 13.5987535 9.9074719 7.2275565
## h s x g y u d
## 9.3956384 3.3197948 10.8502874 10.1246593 -0.2329271 1.0807793 16.1046783
## a m v i z o n
## 1.1690069 8.1690966 15.2880059 7.4134678 4.5265315 1.0483853 18.0960436
## c q l
## 5.2153616 9.4789423 8.0647494
注意:以上做法會有問題。 因名稱會有重複的現象,所以當你用character vector subsetting的方式按名稱取值,只會抓出具相同名稱的樣本點中第一個值,因此在上述結果只有看各名稱下唯一值的數字。 建議:雖然R允許取重複名稱,但我們不建議。
nms <- setdiff(x = x, y = as.character(1:9)) # 這是我們要抓的名單
# 用is.element()一個個去檢查每一個樣本點是否在我們的名單(set)中(採logical vector subsetting)
d[is.element(el = names(d), set = nms)]
## t w w b p r e
## 3.9707172 10.2056316 6.0672282 8.0725535 7.6206606 13.5987535 9.9074719
## p f h s x g y
## 5.0878609 7.2275565 9.3956384 3.3197948 10.8502874 10.1246593 -0.2329271
## t s r e f u t
## 3.8649200 14.1712367 12.6628587 6.7658752 13.0158063 1.0807793 12.8048786
## d e w a a m v
## 16.1046783 13.4971476 7.6742453 1.1690069 10.9464430 8.1690966 15.2880059
## u i h a a b a
## 3.2582047 7.4134678 10.9273251 9.7815428 8.9204331 17.3188767 11.1483332
## u g z z s o h
## 5.1590856 11.2585569 4.5265315 5.0184900 10.5028901 1.0483853 11.5585061
## s o v h v f z
## 22.7699400 5.6958112 12.7196422 8.0383098 16.1772095 15.9804322 7.5212655
## x n c d e p y
## 8.5282939 18.0960436 5.2153616 10.2061856 11.4272881 9.2032820 7.6954055
## y v d o r r p
## 10.8421904 16.9774651 13.6421313 11.6754497 15.8463825 11.2398341 8.2092526
## z c e r g m i
## 16.9174666 12.0603458 9.3849607 9.6688534 -1.6124544 4.7717175 19.8576619
## c f q t o l n
## 0.3950240 12.3106304 9.4789423 12.3391970 14.1427640 8.0647494 20.0946908
d[is.element(el = names(d), set = c("A", "B", "100"))] # 會取出空向量
## named numeric(0)
d[is.element(el = names(d), set = c(nms, "1", "2", "3"))] # 名單添加1號, 2號, 3號
## t w 1 w b p r
## 3.9707172 10.2056316 13.1969420 6.0672282 8.0725535 7.6206606 13.5987535
## e p f 2 h s x
## 9.9074719 5.0878609 7.2275565 10.6069059 9.3956384 3.3197948 10.8502874
## g y 3 t s r e
## 10.1246593 -0.2329271 11.0657705 3.8649200 14.1712367 12.6628587 6.7658752
## f u 2 t d e w
## 13.0158063 1.0807793 11.6747108 12.8048786 16.1046783 13.4971476 7.6742453
## a a m v 2 u i
## 1.1690069 10.9464430 8.1690966 15.2880059 6.2918927 3.2582047 7.4134678
## h a a b a u g
## 10.9273251 9.7815428 8.9204331 17.3188767 11.1483332 5.1590856 11.2585569
## z z s 1 o h s
## 4.5265315 5.0184900 10.5028901 14.7684014 1.0483853 11.5585061 22.7699400
## o v h v f z x
## 5.6958112 12.7196422 8.0383098 16.1772095 15.9804322 7.5212655 8.5282939
## n c d e p y y
## 18.0960436 5.2153616 10.2061856 11.4272881 9.2032820 7.6954055 10.8421904
## v d o r r p z
## 16.9774651 13.6421313 11.6754497 15.8463825 11.2398341 8.2092526 16.9174666
## c e r g m i c
## 12.0603458 9.3849607 9.6688534 -1.6124544 4.7717175 19.8576619 0.3950240
## f q t 2 o l n
## 12.3106304 9.4789423 12.3391970 12.2196041 14.1427640 8.0647494 20.0946908
當Vector擁有維度(dimension)之後,其類別型態變為 『陣列(Array)』。當其維度為『1維』時,其為『一維陣列』。維度為『2維』時,其 特稱 為 『矩陣(Matrix)』 ,其他情況,如『3維以上』亦皆為『陣列』。
因為陣列(含矩陣)其來源為同質性的Atomic Vector,故陣列(含矩陣)亦為 同質性 的資料型態。
v <- 1:30
dim(v)
## NULL
class(v)
## [1] "integer"
dim(v) <- 30
class(v) # 為『1維陣列』
## [1] "array"
v
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30
dim(v) <- c(5, 6)
class(v) # 為矩陣
## [1] "matrix"
v
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 6 11 16 21 26
## [2,] 2 7 12 17 22 27
## [3,] 3 8 13 18 23 28
## [4,] 4 9 14 19 24 29
## [5,] 5 10 15 20 25 30
dim(v) <- c(5, 3, 2)
class(v) # 3維陣列
## [1] "array"
dim(v) <- NULL # 失去維度後,會變為原來同質性的integer vector
v
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30
v1 <- 1:20
dim(v1) <- c(5, 4)
v1
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
v2 <- matrix(data = 1:20, nrow = 5, ncol = 4) # by column-order
v2 # v1與v1內容相同,但做法不同。
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
v3 <- matrix(data = 1:20, nrow = 5, ncol = 4, byrow = TRUE) # NOT by column-order
v3
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
## [4,] 13 14 15 16
## [5,] 17 18 19 20
值得注意的是,針對一個矩陣求取長度(length):
length(v3) # 回傳向量的長度
## [1] 20
由此可知,向量與矩陣的差別在於是否具有維度屬性與否。
為 『異質性(heterogeneous)向量』,亦不具『維度』。
# ?list
l1 <- list(TRUE, 1L, 1, "1", list(100, 200), mean, median, sd)
class(l1)
## [1] "list"
length(l1)
## [1] 8
dim(l1)
## NULL
print(l1)
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] 1
##
## [[3]]
## [1] 1
##
## [[4]]
## [1] "1"
##
## [[5]]
## [[5]][[1]]
## [1] 100
##
## [[5]][[2]]
## [1] 200
##
##
## [[6]]
## function (x, ...)
## UseMethod("mean")
## <bytecode: 0x7fc9e1bce830>
## <environment: namespace:base>
##
## [[7]]
## function (x, na.rm = FALSE, ...)
## UseMethod("median")
## <bytecode: 0x7fc9de7914f8>
## <environment: namespace:stats>
##
## [[8]]
## function (x, na.rm = FALSE)
## sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x),
## na.rm = na.rm))
## <bytecode: 0x7fc9e1a60d80>
## <environment: namespace:stats>
[回傳 (子清單)sub-list 。
l <- list(TRUE, 1L, 1, "1", list(100, 200), matrix(1:20, nrow = 5, ncol = 4), mean)
l[5]
## [[1]]
## [[1]][[1]]
## [1] 100
##
## [[1]][[2]]
## [1] 200
class(l[6])
## [1] "list"
[[回傳list中元素的內容物,內容物是什麼,提領出來就是什麼。
l <- list(TRUE, 1L, 1, "1", list(100, 200), matrix(1:20, nrow = 5, ncol = 4), mean)
l[6]
## [[1]]
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
class(l[[6]])
## [1] "matrix"
l[[7]]
## function (x, ...)
## UseMethod("mean")
## <bytecode: 0x7fc9e1bce830>
## <environment: namespace:base>
class(l[[7]])
## [1] "function"
l[[7]](1:20)
## [1] 10.5
當list具備 『names』 屬性時即可使用$。功能類似[[,但不完全相同。且在互動模式下使用便利。
l <- list(A = 100, B = 200, E = 300)
l[["A"]]
## [1] 100
l$A
## [1] 100
l <- list(A = 100, B = 200, E = 300)
x <- "A"
l[[x]]
## [1] 100
l$x
## NULL
因為R會將程式 l[[x]] 視為 l$x ,因 l 內並無元素命名為 x ,故回傳NULL。
# ?data.frame
df <- data.frame(A = 10:1, B = rnorm(n = 10), C = runif(n = 10))
df
## A B C
## 1 10 -0.3615950 0.82076792
## 2 9 -1.1619680 0.20936290
## 3 8 -0.7114164 0.76743166
## 4 7 0.3489709 0.67099722
## 5 6 0.4273664 0.04625896
## 6 5 1.6608471 0.75360079
## 7 4 0.5816135 0.40778331
## 8 3 0.1434655 0.37252915
## 9 2 0.3433031 0.25478193
## 10 1 -0.9314912 0.39000806
class(df)
## [1] "data.frame"
typeof(df) # data frame的內部型態本質為list
## [1] "list"
dim(df) # 與矩陣一樣具有二維維度
## [1] 10 3
dim(df)
## [1] 10 3
dimnames(df)
## [[1]]
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
##
## [[2]]
## [1] "A" "B" "C"
colnames(df)
## [1] "A" "B" "C"
rownames(df)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
df[, "A"]
## [1] 10 9 8 7 6 5 4 3 2 1
df[2, ]
## A B C
## 2 9 -1.161968 0.2093629
df["2", ]
## A B C
## 2 9 -1.161968 0.2093629
df[8, 1]
## [1] 3
df["8", 1]
## [1] 3
df["8", "A"]
## [1] 3
df[8, "A"]
## [1] 3
length(df)
## [1] 3
names(df)
## [1] "A" "B" "C"
df$A
## [1] 10 9 8 7 6 5 4 3 2 1
df$A[8]
## [1] 3
df[["A"]][8]
## [1] 3
df[[c(1, 8)]] # recursive setting。分兩層提取資料,第一層先抓第1個元素,進去第二層資料後提領第8個。
## [1] 3
1e-3
## 0.001
1E-3 # 大寫E也可以
## 0.001
type(1e3)
## <class 'float'>
import math
type(math.e ** 2 )
## <class 'float'>
type(100)
## <class 'int'>
type(100.)
## <class 'float'>
type(100.0)
## <class 'float'>
x = 100
x
## 100
type(x)
## <class 'int'>
True
## True
type(False)
## <class 'bool'>
float(True)
## 1.0
float(False)
## 0.0
int(True)
## 1
int(False)
## 0
True is 1
## False
True == 1 # 因型別轉換
## True
0 == False
## True
0.0 == False
## True
0 + 0j == False
## True
[] == False
## False
() == False
## False
{} == False
## False
None == False
## False
not []
## True
not ()
## True
not {}
## True
not None
## True
# help(str)
'This is a string'
## 'This is a string'
"This is a string"
## 'This is a string'
'''This is a string'''
## 'This is a string'
"""This is a string"""
## 'This is a string'
反斜線(\)可用於『脫逸字元(escape character)』,可賦予特殊意義。
x = "\tThis string starts with a \"tab\"."
x
## '\tThis string starts with a "tab".'
print(x)
## This string starts with a "tab".
x = "This string contains a single backslash(\\)."
x
## 'This string contains a single backslash(\\).'
print(x)
## This string contains a single backslash(\).
x = 'Can\'t get by without a backslash'
x
## "Can't get by without a backslash"
print(x)
## Can't get by without a backslash
x = "Can't get by without a backslash"
x
## "Can't get by without a backslash"
print(x)
## Can't get by without a backslash
x = "Backslash your \"character\" !"
x
## 'Backslash your "character" !'
print(x)
## Backslash your "character" !
x = 'You can leave the " alone'
x
## 'You can leave the " alone'
print(x)
## You can leave the " alone
Python提供『三引號』,可建立『跨行字串』,且字串中可包含『單引號’』與『雙引號"』 }
x = """Starting and ending a string with triple " characters
permits embedded newlines, and the use of " and ' without
backslashes"""
x
## 'Starting and ending a string with triple " characters\npermits embedded newlines, and the use of " and \' without\nbackslashes'
print(x)
## Starting and ending a string with triple " characters
## permits embedded newlines, and the use of " and ' without
## backslashes
# name = input("Name? ") # 從使用者鍵入值取得資料
# type(name)
# print(name)
l0 = [] # 空list
l1 = [True, 1, 1.0, '1', ['a', 'b', 'c']]
l1
## [True, 1, 1.0, '1', ['a', 'b', 'c']]
type(l1)
## <class 'list'>
len(l1)
## 5
list('NCCU')
## ['N', 'C', 'C', 'U']
list(range(-4, 4))
## [-4, -3, -2, -1, 0, 1, 2, 3]
l2 = [1, 2, [3, 4]]
l3 = [5, 6, [7, 8]]
l2 + l3 # list的拼接
## [1, 2, [3, 4], 5, 6, [7, 8]]
l2 * 3 # list的重複
## [1, 2, [3, 4], 1, 2, [3, 4], 1, 2, [3, 4]]
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
matrix
## [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
matrix[1] # 取出index = 1的元素(Python:從0開始數)
## [4, 5, 6]
matrix[1][1]
## 5
L = ['NCCU', 'MoneyBanking', 'QF']
L[1] = 'MONEY_BANKING'
L
## ['NCCU', 'MONEY_BANKING', 'QF']
L[0:2] = ['School', 'Department']
L
## ['School', 'Department', 'QF']
L = [1, 2, 3]
L
## [1, 2, 3]
L[1:2] = [8, 9] # 長度可變:insertion
L
## [1, 8, 9, 3]
L[1:1] = [6, 7] # 長度可變:insertion, replace nothing
L
## [1, 6, 7, 8, 9, 3]
L[1:3] = []
L
## [1, 8, 9, 3]
# R code:
# L <- list(1, 2, 3)
# L[1] <- list(8, 9)
# 被替換的項目不是替換值長度的倍數
L = [1]
L[:0] = [2, 3, 4] # insert all at 0
L
## [2, 3, 4, 1]
L[len(L):] = [5, 6, 7] # insert all at len(L)
L
## [2, 3, 4, 1, 5, 6, 7]
L.extend([8, 9, 10]) # insert all at end, by method
L
## [2, 3, 4, 1, 5, 6, 7, 8, 9, 10]
L = ['eat', 'more', 'SPAM']
L.append('please')
L
## ['eat', 'more', 'SPAM', 'please']
L.sort()
L
## ['SPAM', 'eat', 'more', 'please']
L.append(['Chen']) # 請與L.extend() method 比較
L
# L.sort()
# TypeError: '<' not supported between instances of 'list' and 'str'
#
# Detailed traceback:
# File "<string>", line 1, in <module>
## ['SPAM', 'eat', 'more', 'please', ['Chen']]
L = ['abc', 'ABD', 'aBe']
L.sort()
L
## ['ABD', 'aBe', 'abc']
L = ['abc', 'ABD', 'aBe']
L.sort(key = str.lower)
L
## ['abc', 'ABD', 'aBe']
L = ['abc', 'ABD', 'aBe']
L.sort(key = str.lower, reverse = True)
L
## ['aBe', 'ABD', 'abc']
L = ['spam', 'eggs', 'ham']
L.index('eggs')
# L.index('egg')
# ValueError: 'egg' is not in list
#
# Detailed traceback:
# File "<string>", line 1, in <module>
## 1
L.insert(1, 'toast')
L
## ['spam', 'toast', 'eggs', 'ham']
L.remove('eggs')
L
## ['spam', 'toast', 'ham']
L.pop(1) # delete by position
## 'toast'
L
## ['spam', 'ham']
['1', '2', '1', '1', '3'].count('1') # number of occurences
## 3
L = ['spam', 'eggs', 'ham', 'toast']
del L[0]
L
## ['eggs', 'ham', 'toast']
del L[1:3]
L
## ['eggs']
import random
t1 = (1, 2, 2, random.gauss(10, 2))
t1
## (1, 2, 2, 7.277264609390826)
type(t1)
## <class 'tuple'>
t2 = 2, 4, 8, 1
t2
## (2, 4, 8, 1)
type(t2)
## <class 'tuple'>
max(t2)
## 8
t3 = 3,
type(t3)
## <class 'tuple'>
3 in [3, 4, 7, 9, 1]
## True
one, two, three, four = 1, 2, 3, 4 # 自動打包(packing)後自動解包(unpacking),同時指定4個變數值
one
## 1
two
## 2
v1, v2, v3 = [1, 2, 3]
v1
## 1
w1, w2, w3 = 'abc'
w2
# q1, q2 = 'ABC' # 多重指定變數值時,兩邊數量要一樣多
# ValueError: too many values to unpack (expected 2)
#
# Detailed traceback:
# File "<string>", line 1, in <module>
## 'b'
a, b, *c = (1, 2, 3, 4)
a
## 1
b
## 2
c
## [3, 4]
a, *b, c = (1, 2, 3, 4)
a
## 1
b
## [2, 3]
c
## 4
*a, b, c = (1, 2, 3, 4)
a
## [1, 2]
b
## 3
c
## 4
a,b,c
## ([1, 2], 3, 4)
a, b, c, d, *e = (1, 2, 3, 4)
a
## 1
b
## 2
c
## 3
e
## []
x = [1, 2, 3, 4, 5]
a, b, *_ = x
a
## 1
b
## 2
_
## [3, 4, 5]
可使用list()函數將任何序列型資料轉為list。而tuple()函數則轉為tuple。
list((1, 2, 3, 4))
## [1, 2, 3, 4]
tuple([1, 2, 3, 4])
## (1, 2, 3, 4)
list('NCCU')
## ['N', 'C', 'C', 'U']
tuple('Money and Banking')
## ('M', 'o', 'n', 'e', 'y', ' ', 'a', 'n', 'd', ' ', 'B', 'a', 'n', 'k', 'i', 'n', 'g')
set代表無順序的資料所構成。反之,list與tuple之元素則有順序意義。而set中重複的資料會被自動刪除不計。值得注意的是,set中的元素須為『不可變的資料』,故整數、浮點數、字串、與tuple可作為set的元素。而list、dict與set本身則不行。
l1 = l2 = [1, 2, 3] # list為可變資料
l1
## [1, 2, 3]
l2
## [1, 2, 3]
l1[0] = 100
l1
## [100, 2, 3]
l2 # 因list為『可變資料』,所以l2隨l1變化而改變
## [100, 2, 3]
n1 = n2 = 1 # int為不可變資料
n1
## 1
n2
## 1
n1 = 100
n1
## 100
n2 # 因int為『不可變資料』,所以n2不隨n1變化而改變
## 1
x = {1, 2, 1, 3, 3, 1, 2, 4}
x
## {1, 2, 3, 4}
type(x)
## <class 'set'>
x = set([1, 2, 1, 3, 3, 1, 2, 4])
x
## {1, 2, 3, 4}
type(x)
## <class 'set'>
x.add(6)
x
## {1, 2, 3, 4, 6}
x.remove(2)
x
## {1, 3, 4, 6}
3 in x
## True
5 in x
## False
x = set([1, 2, 3, 1, 3, 5])
z = frozenset(x)
type(x)
## <class 'set'>
type(z)
# z.add(6)
# AttributeError: 'frozenset' object has no attribute 'add'
#
# Detailed traceback:
# File "<string>", line 1, in <module>
## <class 'frozenset'>
x.add(z)
x
## {1, 2, 3, 5, frozenset({1, 2, 3, 5})}
ages = {'Mary':13, 'John': 14, 'Tony':13}
type(ages)
## <class 'dict'>
ages
## {'Mary': 13, 'John': 14, 'Tony': 13}
'Mary' in ages
## True
x = {}
x
## {}
type(x)
## <class 'dict'>
x[0] = 'NCCU' # 此0是當作key,並非當作索引用
x[1] = 'Money and Banking'
x
## {0: 'NCCU', 1: 'Money and Banking'}
x[1]
## 'Money and Banking'
len(x)
## 2
# empty list
y = []
type(y)
# y[0] = 'NCCU' # 指定一個不存在的索引值0,值得注意的是:R允許這個操作
# ndexError: list assignment index out of range
#
# Detailed traceback:
# File "<string>", line 1, in <module>
## <class 'list'>
l <- list()
class(l)
## [1] "list"
l[1] <- "NCCU"
l
## [[1]]
## [1] "NCCU"
import math
import cmath
1 + 1j
## (1+1j)
1 - 2J # 大寫J也可以
# NOT RUN
# math.sqrt(-1)
## (1-2j)
cmath.sqrt(-1 + 0j)
## 1j
cmath.sqrt(-1)
## 1j
特殊的基本型態,代表『不存在』或是『空值』的意義(類似R中的 NULL )。
None在Python亦代表 佔位符號(place holder),用來表示資料中某一個欄位目前尚未得知具體之值,先保留該位置,之後再填值(類似R中 NA 的功能)。
type(None)
## <class 'NoneType'>
None == False
## False
None == 0
## False
None == None # None只會等於自己
## True
False == 0
# 與R比較 (Not Run):
# len(None)
# TypeError: object of type 'NoneType' has no len()
## True
float('Inf')
## inf
float('inf')
## inf
float('INF')
## inf
import math
math.inf
## inf
math.isinf(float("-inf")) # OUTPUT:True. Return True if x is a positive or negative infinity, and False otherwise.
## True
math.isinf(float("inf")) # OUTPUT:True
# by comparing to infinity
## True
float("inf") == float("inf") # OUTPUT:True
## True
float("-inf") == float("-inf") # OUTPUT:True
## True
float("inf") == float("-inf")
## False
『保留字(Reserved words)』 不可做為變數名稱
R:
if, else, repeat, while, function,
for, in, next, break,TRUE, FALSE,
NULL, Inf, NaN, NA, NA_integer_,
NA_real_, NA_complex_, NA_character_
Python:
and, def, False, import, not,
True, as, del, finally, in, or,
try, assert, elif, for, is, pass,
while, break, else, from, lambda, print,
with, class, except, global, None, raise,
yield, continue, exec, if, nonlocal, return
# install.packages("xts")
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
search()
## [1] ".GlobalEnv" "package:xts" "package:zoo"
## [4] "package:stats" "package:graphics" "package:grDevices"
## [7] "package:utils" "package:datasets" "package:methods"
## [10] "Autoloads" "package:base"
stats::rnorm(n = 5)
## [1] -0.07624891 0.15594520 -0.09452932 0.95981448 -0.63450988
善用循環補齊、即 元素對元素運算(向量化運算) ,執行速度較快。
1 == 1
## [1] TRUE
1 == 2
## [1] FALSE
v1 <- 1:5
v2 <- 1:5
v3 <- 1:10
v4 <- 1:7
v1 + v4 # 會出現警告,因為長度並非互為倍數
## Warning in v1 + v4: 較長的物件長度並非較短物件長度的倍數
## [1] 2 4 6 8 10 7 9
v1 + 100 # 後面會進行循環補齊,形成具五個100的向量,並做元素對元素相加
## [1] 101 102 103 104 105
v1 * 4 # 後面會進行循環補齊,形成具五個4的向量,並做元素對元素相乘
## [1] 4 8 12 16 20
v1 >= 3 # 循環補齊做循環比較
## [1] FALSE FALSE TRUE TRUE TRUE
x <- c(1, 2, 3, NA, 5, NA, 8)
x == NA # 會得出NA,要小心
## [1] NA NA NA NA NA NA NA
2 == NA # 會得出NA,要小心
## [1] NA
# is.___ 為「疑問句」
# as.___ 為「當作」
is.na(x)
## [1] FALSE FALSE FALSE TRUE FALSE TRUE FALSE
set.seed(seed = 100) # 設定亂數種子
d <- rnorm(n = 100)
# 求出d > 1.96的數字個數
d > 1.96 # 可看出哪些值滿足,得出邏輯向量
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [97] FALSE FALSE FALSE FALSE
sum(d > 1.96) # 求個數,藉由「強制型別轉換」,TRUE為數值1,FALSE為數值0
## [1] 3
mean(d > 1.96) #求機率
## [1] 0.03