本週作業與進度

下週進度



R functions

Python functions


R

  1. 這部分來探討R最基礎的資料結構- Atomic Vector ,再討論以原子向量為延伸,加入 dim 屬性的 Array 。接著討論屬於異質資料的 ListData Frame

  2. 原子向量(atomic vector)又稱為 「同質性向量(homogeneous)」

v <- c(TRUE, FALSE)
class(v)
## [1] "logical"
v
## [1]  TRUE FALSE
v <- c(TRUE, FALSE, 100L)
class(v)
## [1] "integer"
v
## [1]   1   0 100
v <- c(TRUE, FALSE, 100L, 100)
class(v)
## [1] "numeric"
v
## [1]   1   0 100 100
v <- c(TRUE, FALSE, 100L, 100, 100 + 0i)
class(v)
## [1] "complex"
v
## [1]   1+0i   0+0i 100+0i 100+0i 100+0i
v <- c(TRUE, FALSE, 100L, 100, "100")
class(v)
## [1] "character"
v
## [1] "TRUE"  "FALSE" "100"   "100"   "100"
  1. R沒有 純量(scalar),純量只不過是長度為1的向量
?vector # 查詢vector函數
vector() # 空向量(empty vector)
## logical(0)
vector(mode = "logical", length = 5)  # 各個不同類別型態的向量初始化
## [1] FALSE FALSE FALSE FALSE FALSE
vector(mode = "numeric", length = 5)  # 注意向量元素的內容
## [1] 0 0 0 0 0
vector(mode = "integer", length = 5)
## [1] 0 0 0 0 0
vector(mode = "character", length = 5)
## [1] "" "" "" "" ""
vector(mode = "complex", length = 5)
## [1] 0+0i 0+0i 0+0i 0+0i 0+0i
  1. R裡面所有的操作背後都有一個函數
  1. 數字開頭
# NOT RUN
# 1x <- 100
# 錯誤: unexpected symbol in "1x"
  1. 『_』開頭
# _x <- 100
# 錯誤: unexpected input in "_"
  1. 含空白字元
# x y <- 100
# 錯誤: unexpected symbol in "x y"
  1. 為保留字(reserved words)。可用?reserved查詢所有R的保留字。

另外,如用dot開頭(ex:.xx)命名時,變數會被創建,但物件名不會出現在Global Enviroment中。但可透過 ls() 中將參數 all.names 設定為 TRUE 即可

.x <- 100
ls() # 看不到.x
## [1] "v"
ls(all.names = TRUE) # 可看見.x
## [1] ".x" "v"

如要打破上述命名規則,可使用成對『``』將名字放在其中:

`1x` <- 100
`1x`
## [1] 100
`_x` <- 100
`_x`
## [1] 100
`:)` <- 100
`:)`
## [1] 100
`x y` <- 100
`x y`
## [1] 100

常見的R指令(如二元運算子),背後也都有一個函數作支援:

10 > 2
## [1] TRUE
`>`(10, 2) # 指令與上行相同
## [1] TRUE
x <- 100
x
## [1] 100
`<-`(x, 100) # 指令與上行相同
x
## [1] 100
  1. R的 向量 沒有 維度(dimension) 這個屬性
dim(x = 1:10)  ## NULL: 代表『不存在』意義的一個R物件
## NULL
dim(x = vector()) # 即使空向量也是
## NULL
  1. R裡面所有的東西都是 物件,包含函數。
class(x = rnorm)   # function物件
## [1] "function"

Atomic Vector

numeric

100 # 數字100
## [1] 100
class(x = 100) # 查詢物件的類別型態
## [1] "numeric"
rnorm(n = 10, mean = 5, sd = 2) # 產生常態分配的亂數
##  [1] 9.4875071 0.8865933 7.5264319 5.8365288 3.8103149 7.2210596 6.3525279
##  [8] 5.8351004 7.2734849 4.3776636
1e-3
## [1] 0.001
1E-3 # 大寫E與小寫e皆可
## [1] 0.001
class(x = 2.78e-3)
## [1] "numeric"

integer

x <- 1:10
length(x = x)
## [1] 10
dim(x = x)
## NULL
class(x = x)
## [1] "integer"
1L
## [1] 1
class(x = 1L)
## [1] "integer"
1:10; 10:1  # 加;號,簡短程式可寫在同一行
##  [1]  1  2  3  4  5  6  7  8  9 10
##  [1] 10  9  8  7  6  5  4  3  2  1

logical

TRUE
## [1] TRUE
FALSE
## [1] FALSE
c(T, T, F, F) # 縮寫
## [1]  TRUE  TRUE FALSE FALSE

character (character string)

"A"
## [1] "A"
'A'
## [1] "A"
c("A", "B", "C")
## [1] "A" "B" "C"
"" # 空字串
## [1] ""
# 常用的字元字串向量
letters
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
LETTERS
##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
month.abb
##  [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
month.name
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"

complex

clx1 <- 1 + 1i
class(clx1)
## [1] "complex"
clx2 <- 1 + 0i
class(clx2)
## [1] "complex"
# 1 + 1*i
# not run

\(\sqrt{-1}\) :

sqrt(-1 + 0i)  # 可得虛數i
## [1] 0+1i

NA

not available 的縮寫,代表『遺失值』意義。

class(NA)
## [1] "logical"
class(NA_character_); class(NA_complex_); class(NA_integer_); class(NA_complex_)
## [1] "character"
## [1] "complex"
## [1] "integer"
## [1] "complex"
length(NA)
## [1] 1
x <- c(1, 2, NA, 4, NA)
x == NA
## [1] NA NA NA NA NA
# 判斷是否為NA,請用is.na()
is.na(x)
## [1] FALSE FALSE  TRUE FALSE  TRUE

NULL

代表『不存在』意義的特殊物件。

class(NULL)
## [1] "NULL"
length(NULL)
## [1] 0
NULL == NULL
## logical(0)
NULL == NA
## logical(0)
NULL == 0
## logical(0)
NULL == NULL
## logical(0)
is.null(NULL)
## [1] TRUE

Inf

class(Inf)
## [1] "numeric"
print(1/0)
## [1] Inf
print(log(0))
## [1] -Inf
1 + 1/0
## [1] Inf
Inf == Inf
## [1] TRUE
Inf == -Inf
## [1] FALSE
is.infinite(-Inf)
## [1] TRUE
is.infinite(Inf)
## [1] TRUE
is.finite(Inf)
## [1] FALSE

常見的屬性(attributes)與索引操作(subsetting)

names

x <- 1:5
x
## [1] 1 2 3 4 5
names(x)
## NULL
names(x) <- c("A", "B", "C", "D", "E")
x
## A B C D E 
## 1 2 3 4 5

dim

由之前的範例可知,R的向量不具有維度(dim)屬性

dim(x)
## NULL

值得注意的是,長度(length())並不算屬性

subsetting for [

postive vector
v <- c(1, 4, 5, 2, 8)
length(v)
## [1] 5
v[1]
## [1] 1
v[3]
## [1] 5
v[1:3]
## [1] 1 4 5
# v[1,3, 3]
v[c(1, 3, 5)]
## [1] 1 5 8
v[7]
## [1] NA
v[4:8]
## [1]  2  8 NA NA NA
character vector
v <- c(1, 4, 5, 2, 8)
names(v) <- letters[1:5]
v
## a b c d e 
## 1 4 5 2 8
v["a"]
## a 
## 1
v[c("a", "c", "e")]
## a c e 
## 1 5 8
negative vector
v <- 1:5
length(v)
## [1] 5
v[-1]
## [1] 2 3 4 5
v[-c(1, 3, 5)]
## [1] 2 4
v[-(1:3)]
## [1] 4 5
v[-8]   # 欲刪除之index不存在
## [1] 1 2 3 4 5
logical
v <- 1:5
v[c(T, T, F, F, T)]
## [1] 1 2 5
NULL
v <- 1:5
v[NULL]
## integer(0)
nothing
v <- 1:5
v[]
## [1] 1 2 3 4 5
zero
v <- 1:4
v[0]
## integer(0)
w <- letters[1:5]
w[0]
## character(0)
NA
v <- 1:5
v[NA]
## [1] NA NA NA NA NA
postive + negative
# v <- 1:4
# v[c(1, -3, 4)]
# Error in v[c(1, -3, 4)] : 只有負數下標中才能有 0
postive + NA
v <- 1:5
v[c(1, 2, NA, 4)]
## [1]  1  2 NA  4
negtive + NA
v <- 1:5
v[c(-1, -2, 0, -4)]
## [1] 3 5
postive + zero
v <- 1:5
v[c(0, 1, 2)]
## [1] 1 2
v[c(1, 0, 2)]
## [1] 1 2
v[c(1, 2, 0)]
## [1] 1 2
negtive + zero
v <- 1:5
v[c(0, -1, -2)]
## [1] 3 4 5
positive + NULL
v <- 1:5
v[NULL]
## integer(0)
v[c(NULL, 1, 3)]
## [1] 1 3
negative + NULL
# v[c(-1, -2, NA)]
# Error in v[c(-1, -2, NA)] : 只有負數下標中才能有 0

案例補充 – 集合操作

set.seed(seed = 1000) # 設定亂數種子
d <- rnorm(n = 100, mean = 10, sd = 5)
head(d) # 觀察前6筆資料
## [1]  7.771109  3.970717 10.205632 13.196942  6.067228  8.072554
tail(d) # 觀察後6筆資料
## [1]  9.478942 12.339197 12.219604 14.142764  8.064749 20.094691
tail(d, n = 10)  # 可透過參數n設定,觀察更多筆資料
##  [1] 19.857662  0.395024 12.310630  9.196380  9.478942 12.339197 12.219604
##  [8] 14.142764  8.064749 20.094691
names(d)
## NULL
names(d) <- sample(x = c(letters, 1:9), size = 100, replace = TRUE) # 設定names屬性

透過R集合(set)相關的函數操作,可將向量視為集合,如集合的交集、聯集與差集等。

?setdiff  # 查詢相關集合函數
setequal(x = c(1, 1, 2, 3, 3, 3), y = c(1, 3, 2)) # 集合的比較
## [1] TRUE
x <- names(d)  # 抓出每個樣本點的名稱
x              # 注意:名稱會有重複的現象。
##   [1] "6" "t" "w" "1" "w" "b" "p" "r" "e" "7" "p" "f" "2" "h" "s" "x" "5" "g"
##  [19] "y" "3" "9" "t" "s" "r" "e" "f" "u" "2" "t" "d" "9" "e" "9" "w" "a" "a"
##  [37] "m" "v" "2" "u" "i" "8" "h" "a" "a" "b" "a" "7" "9" "u" "g" "z" "8" "z"
##  [55] "s" "1" "o" "h" "s" "o" "v" "h" "v" "f" "z" "x" "7" "n" "c" "d" "7" "7"
##  [73] "e" "5" "p" "y" "y" "v" "d" "o" "r" "r" "p" "z" "c" "e" "r" "g" "m" "6"
##  [91] "i" "c" "f" "6" "q" "t" "2" "o" "l" "n"
setdiff(x = x, y = as.character(1:9)) # 以集合的方式扣除掉數字名後得出所有英文字母的名稱,不會有重複
##  [1] "t" "w" "b" "p" "r" "e" "f" "h" "s" "x" "g" "y" "u" "d" "a" "m" "v" "i" "z"
## [20] "o" "n" "c" "q" "l"
d[setdiff(x = x, y = as.character(1:9))]
##          t          w          b          p          r          e          f 
##  3.9707172 10.2056316  8.0725535  7.6206606 13.5987535  9.9074719  7.2275565 
##          h          s          x          g          y          u          d 
##  9.3956384  3.3197948 10.8502874 10.1246593 -0.2329271  1.0807793 16.1046783 
##          a          m          v          i          z          o          n 
##  1.1690069  8.1690966 15.2880059  7.4134678  4.5265315  1.0483853 18.0960436 
##          c          q          l 
##  5.2153616  9.4789423  8.0647494

注意:以上做法會有問題。 因名稱會有重複的現象,所以當你用character vector subsetting的方式按名稱取值,只會抓出具相同名稱的樣本點中第一個值,因此在上述結果只有看各名稱下唯一值的數字。 建議:雖然R允許取重複名稱,但我們不建議。

nms <- setdiff(x = x, y = as.character(1:9)) # 這是我們要抓的名單
# 用is.element()一個個去檢查每一個樣本點是否在我們的名單(set)中(採logical vector subsetting)
d[is.element(el = names(d), set = nms)]     
##          t          w          w          b          p          r          e 
##  3.9707172 10.2056316  6.0672282  8.0725535  7.6206606 13.5987535  9.9074719 
##          p          f          h          s          x          g          y 
##  5.0878609  7.2275565  9.3956384  3.3197948 10.8502874 10.1246593 -0.2329271 
##          t          s          r          e          f          u          t 
##  3.8649200 14.1712367 12.6628587  6.7658752 13.0158063  1.0807793 12.8048786 
##          d          e          w          a          a          m          v 
## 16.1046783 13.4971476  7.6742453  1.1690069 10.9464430  8.1690966 15.2880059 
##          u          i          h          a          a          b          a 
##  3.2582047  7.4134678 10.9273251  9.7815428  8.9204331 17.3188767 11.1483332 
##          u          g          z          z          s          o          h 
##  5.1590856 11.2585569  4.5265315  5.0184900 10.5028901  1.0483853 11.5585061 
##          s          o          v          h          v          f          z 
## 22.7699400  5.6958112 12.7196422  8.0383098 16.1772095 15.9804322  7.5212655 
##          x          n          c          d          e          p          y 
##  8.5282939 18.0960436  5.2153616 10.2061856 11.4272881  9.2032820  7.6954055 
##          y          v          d          o          r          r          p 
## 10.8421904 16.9774651 13.6421313 11.6754497 15.8463825 11.2398341  8.2092526 
##          z          c          e          r          g          m          i 
## 16.9174666 12.0603458  9.3849607  9.6688534 -1.6124544  4.7717175 19.8576619 
##          c          f          q          t          o          l          n 
##  0.3950240 12.3106304  9.4789423 12.3391970 14.1427640  8.0647494 20.0946908
d[is.element(el = names(d), set = c("A", "B", "100"))]    # 會取出空向量
## named numeric(0)
d[is.element(el = names(d), set = c(nms, "1", "2", "3"))] # 名單添加1號, 2號, 3號   
##          t          w          1          w          b          p          r 
##  3.9707172 10.2056316 13.1969420  6.0672282  8.0725535  7.6206606 13.5987535 
##          e          p          f          2          h          s          x 
##  9.9074719  5.0878609  7.2275565 10.6069059  9.3956384  3.3197948 10.8502874 
##          g          y          3          t          s          r          e 
## 10.1246593 -0.2329271 11.0657705  3.8649200 14.1712367 12.6628587  6.7658752 
##          f          u          2          t          d          e          w 
## 13.0158063  1.0807793 11.6747108 12.8048786 16.1046783 13.4971476  7.6742453 
##          a          a          m          v          2          u          i 
##  1.1690069 10.9464430  8.1690966 15.2880059  6.2918927  3.2582047  7.4134678 
##          h          a          a          b          a          u          g 
## 10.9273251  9.7815428  8.9204331 17.3188767 11.1483332  5.1590856 11.2585569 
##          z          z          s          1          o          h          s 
##  4.5265315  5.0184900 10.5028901 14.7684014  1.0483853 11.5585061 22.7699400 
##          o          v          h          v          f          z          x 
##  5.6958112 12.7196422  8.0383098 16.1772095 15.9804322  7.5212655  8.5282939 
##          n          c          d          e          p          y          y 
## 18.0960436  5.2153616 10.2061856 11.4272881  9.2032820  7.6954055 10.8421904 
##          v          d          o          r          r          p          z 
## 16.9774651 13.6421313 11.6754497 15.8463825 11.2398341  8.2092526 16.9174666 
##          c          e          r          g          m          i          c 
## 12.0603458  9.3849607  9.6688534 -1.6124544  4.7717175 19.8576619  0.3950240 
##          f          q          t          2          o          l          n 
## 12.3106304  9.4789423 12.3391970 12.2196041 14.1427640  8.0647494 20.0946908

Array

當Vector擁有維度(dimension)之後,其類別型態變為 『陣列(Array)』。當其維度為『1維』時,其為『一維陣列』。維度為『2維』時,其 特稱『矩陣(Matrix)』 ,其他情況,如『3維以上』亦皆為『陣列』。

因為陣列(含矩陣)其來源為同質性的Atomic Vector,故陣列(含矩陣)亦為 同質性 的資料型態。

v <- 1:30
dim(v)
## NULL
class(v)
## [1] "integer"
dim(v) <- 30
class(v) # 為『1維陣列』
## [1] "array"
v
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30
dim(v) <- c(5, 6)
class(v) # 為矩陣
## [1] "matrix"
v
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    6   11   16   21   26
## [2,]    2    7   12   17   22   27
## [3,]    3    8   13   18   23   28
## [4,]    4    9   14   19   24   29
## [5,]    5   10   15   20   25   30
dim(v) <- c(5, 3, 2)
class(v) # 3維陣列
## [1] "array"
dim(v) <- NULL # 失去維度後,會變為原來同質性的integer vector
v 
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30

Matrix

v1 <- 1:20
dim(v1) <- c(5, 4)
v1
##      [,1] [,2] [,3] [,4]
## [1,]    1    6   11   16
## [2,]    2    7   12   17
## [3,]    3    8   13   18
## [4,]    4    9   14   19
## [5,]    5   10   15   20
v2 <- matrix(data = 1:20, nrow = 5, ncol = 4) # by column-order
v2 # v1與v1內容相同,但做法不同。
##      [,1] [,2] [,3] [,4]
## [1,]    1    6   11   16
## [2,]    2    7   12   17
## [3,]    3    8   13   18
## [4,]    4    9   14   19
## [5,]    5   10   15   20
v3 <- matrix(data = 1:20, nrow = 5, ncol = 4, byrow = TRUE) # NOT by column-order
v3
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
## [4,]   13   14   15   16
## [5,]   17   18   19   20

值得注意的是,針對一個矩陣求取長度(length):

length(v3) # 回傳向量的長度
## [1] 20

由此可知,向量與矩陣的差別在於是否具有維度屬性與否。

List

『異質性(heterogeneous)向量』,亦不具『維度』。

list建立

# ?list
l1 <- list(TRUE, 1L, 1, "1", list(100, 200), mean, median, sd)
class(l1)
## [1] "list"
length(l1)
## [1] 8
dim(l1)
## NULL
print(l1)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] 1
## 
## [[3]]
## [1] 1
## 
## [[4]]
## [1] "1"
## 
## [[5]]
## [[5]][[1]]
## [1] 100
## 
## [[5]][[2]]
## [1] 200
## 
## 
## [[6]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x7fc9e1bce830>
## <environment: namespace:base>
## 
## [[7]]
## function (x, na.rm = FALSE, ...) 
## UseMethod("median")
## <bytecode: 0x7fc9de7914f8>
## <environment: namespace:stats>
## 
## [[8]]
## function (x, na.rm = FALSE) 
## sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x), 
##     na.rm = na.rm))
## <bytecode: 0x7fc9e1a60d80>
## <environment: namespace:stats>

list subsetting

[

回傳 (子清單)sub-list

l <-  list(TRUE, 1L, 1, "1", list(100, 200), matrix(1:20, nrow = 5, ncol = 4), mean)
l[5]
## [[1]]
## [[1]][[1]]
## [1] 100
## 
## [[1]][[2]]
## [1] 200
class(l[6])
## [1] "list"

[[

回傳list中元素的內容物,內容物是什麼,提領出來就是什麼。

l <-  list(TRUE, 1L, 1, "1", list(100, 200), matrix(1:20, nrow = 5, ncol = 4), mean)
l[6]
## [[1]]
##      [,1] [,2] [,3] [,4]
## [1,]    1    6   11   16
## [2,]    2    7   12   17
## [3,]    3    8   13   18
## [4,]    4    9   14   19
## [5,]    5   10   15   20
class(l[[6]])
## [1] "matrix"
l[[7]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x7fc9e1bce830>
## <environment: namespace:base>
class(l[[7]])
## [1] "function"
l[[7]](1:20)
## [1] 10.5

$

當list具備 『names』 屬性時即可使用$。功能類似[[,但不完全相同。且在互動模式下使用便利。

l <- list(A = 100, B = 200, E = 300)
l[["A"]]
## [1] 100
l$A
## [1] 100
l <- list(A = 100, B = 200, E = 300)
x <- "A"
l[[x]]
## [1] 100
l$x
## NULL

因為R會將程式 l[[x]] 視為 l$x ,因 l 內並無元素命名為 x ,故回傳NULL。

Data Frame

# ?data.frame
df <- data.frame(A = 10:1, B = rnorm(n = 10), C = runif(n = 10))
df
##     A          B          C
## 1  10 -0.3615950 0.82076792
## 2   9 -1.1619680 0.20936290
## 3   8 -0.7114164 0.76743166
## 4   7  0.3489709 0.67099722
## 5   6  0.4273664 0.04625896
## 6   5  1.6608471 0.75360079
## 7   4  0.5816135 0.40778331
## 8   3  0.1434655 0.37252915
## 9   2  0.3433031 0.25478193
## 10  1 -0.9314912 0.39000806
class(df)
## [1] "data.frame"
typeof(df) # data frame的內部型態本質為list
## [1] "list"
dim(df)    # 與矩陣一樣具有二維維度
## [1] 10  3
  • 因此,data frame的特性有些來自matrix,有些則來自list。

from Matrix

dim(df)
## [1] 10  3
dimnames(df)
## [[1]]
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
## 
## [[2]]
## [1] "A" "B" "C"
colnames(df)
## [1] "A" "B" "C"
rownames(df)
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
df[, "A"]
##  [1] 10  9  8  7  6  5  4  3  2  1
df[2, ]
##   A         B         C
## 2 9 -1.161968 0.2093629
df["2", ]
##   A         B         C
## 2 9 -1.161968 0.2093629
df[8, 1]
## [1] 3
df["8", 1]
## [1] 3
df["8", "A"]
## [1] 3
df[8, "A"]
## [1] 3

from List

length(df)
## [1] 3
names(df)
## [1] "A" "B" "C"
df$A
##  [1] 10  9  8  7  6  5  4  3  2  1
df$A[8]
## [1] 3
df[["A"]][8]
## [1] 3
df[[c(1, 8)]] # recursive setting。分兩層提取資料,第一層先抓第1個元素,進去第二層資料後提領第8個。
## [1] 3

Python

Basic Type

float

1e-3
## 0.001
1E-3 # 大寫E也可以
## 0.001
type(1e3)
## <class 'float'>
import math
type(math.e ** 2 )
## <class 'float'>
type(100)
## <class 'int'>
type(100.)
## <class 'float'>
type(100.0)
## <class 'float'>

int

x = 100
x
## 100
type(x)
## <class 'int'>

bool

  • 只有True或False
True
## True
type(False)
## <class 'bool'>
float(True)
## 1.0
float(False)
## 0.0
int(True)
## 1
int(False)
## 0
True is 1
## False
True == 1   # 因型別轉換
## True
  • Python的bool值類似C語言:0或是空值為False,其餘為True
  • 數字0、0.0、0 + 0j 皆為False,其餘數字為True
  • 空字串’’為False,其他字串為True
  • 空list []為False,其他list為True
  • 空tuple ()為Fale,其他tuple為True
  • 空dict {}為False,其他dict為True
  • None為False
0 == False
## True
0.0 == False
## True
0 + 0j == False
## True
[] == False
## False
() == False
## False
{} == False
## False
None == False
## False
not []
## True
not ()
## True
not {}
## True
not None
## True

str

  • 為字元序列(sequence)型別。
  • 為不可變(immutable)ㄉ˙
# help(str)
'This is a string'
## 'This is a string'
"This is a string"
## 'This is a string'
'''This is a string'''
## 'This is a string'
"""This is a string"""
## 'This is a string'

反斜線(\)可用於『脫逸字元(escape character)』,可賦予特殊意義。

  • \n:代表換行
  • \t:代表Tab
  • \\:代表正常的反斜線\
  • \“:代表”雙引號字元
x = "\tThis string starts with a \"tab\"." 
x
## '\tThis string starts with a "tab".'
print(x)
##  This string starts with a "tab".
x = "This string contains a single backslash(\\)."
x
## 'This string contains a single backslash(\\).'
print(x)
## This string contains a single backslash(\).
x = 'Can\'t get by without a backslash'
x
## "Can't get by without a backslash"
print(x)
## Can't get by without a backslash
x = "Can't get by without a backslash"
x
## "Can't get by without a backslash"
print(x)
## Can't get by without a backslash
x = "Backslash your \"character\" !"
x
## 'Backslash your "character" !'
print(x)
## Backslash your "character" !
x = 'You can leave the " alone'
x
## 'You can leave the " alone'
print(x)
## You can leave the " alone

Python提供『三引號』,可建立『跨行字串』,且字串中可包含『單引號’』與『雙引號"』 }

x = """Starting and ending a string with triple " characters
permits embedded newlines, and the use of " and ' without
backslashes"""
x
## 'Starting and ending a string with triple " characters\npermits embedded newlines, and the use of " and \' without\nbackslashes'
print(x)
## Starting and ending a string with triple " characters
## permits embedded newlines, and the use of " and ' without
## backslashes
# name = input("Name? ") # 從使用者鍵入值取得資料
# type(name)
# print(name)

list

  • Python中的list自由有順序的元素構成(內建資料型態)。
  • 與R的list相同,皆可包含不同型別(異質)的元素在裡頭。
  • 為可變(mutable)的資料型態,支持『原地修改(modify in place)』
  • list長度可變,可嵌套

list基本操作

l0 = [] # 空list
l1 = [True, 1, 1.0, '1', ['a', 'b', 'c']]
l1
## [True, 1, 1.0, '1', ['a', 'b', 'c']]
type(l1)
## <class 'list'>
len(l1)
## 5
list('NCCU')
## ['N', 'C', 'C', 'U']
list(range(-4, 4))
## [-4, -3, -2, -1, 0, 1, 2, 3]
l2 = [1, 2, [3, 4]]
l3 = [5, 6, [7, 8]]
l2 + l3                                        # list的拼接
## [1, 2, [3, 4], 5, 6, [7, 8]]
l2 * 3                                         # list的重複
## [1, 2, [3, 4], 1, 2, [3, 4], 1, 2, [3, 4]]
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
matrix
## [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
matrix[1]           # 取出index = 1的元素(Python:從0開始數)
## [4, 5, 6]
matrix[1][1]
## 5

list slicing

L = ['NCCU', 'MoneyBanking', 'QF']
L[1] = 'MONEY_BANKING'
L
## ['NCCU', 'MONEY_BANKING', 'QF']
L[0:2] = ['School', 'Department']
L
## ['School', 'Department', 'QF']
L = [1, 2, 3]
L
## [1, 2, 3]
L[1:2] = [8, 9]  # 長度可變:insertion
L
## [1, 8, 9, 3]
L[1:1] = [6, 7]  # 長度可變:insertion, replace nothing
L
## [1, 6, 7, 8, 9, 3]
L[1:3] = []
L
## [1, 8, 9, 3]
# R code:
# L <- list(1, 2, 3)   
# L[1] <- list(8, 9)  
# 被替換的項目不是替換值長度的倍數
L = [1]
L[:0] = [2, 3, 4]            # insert all at 0 
L
## [2, 3, 4, 1]
L[len(L):] = [5, 6, 7]       # insert all at len(L)
L
## [2, 3, 4, 1, 5, 6, 7]
L.extend([8, 9, 10])         # insert all at end, by method
L
## [2, 3, 4, 1, 5, 6, 7, 8, 9, 10]

list更多的操作

L = ['eat', 'more', 'SPAM']
L.append('please')
L
## ['eat', 'more', 'SPAM', 'please']
L.sort()
L
## ['SPAM', 'eat', 'more', 'please']
L.append(['Chen'])           # 請與L.extend() method 比較
L
# L.sort()
# TypeError: '<' not supported between instances of 'list' and 'str'
# 
# Detailed traceback: 
#   File "<string>", line 1, in <module>
## ['SPAM', 'eat', 'more', 'please', ['Chen']]
L = ['abc', 'ABD', 'aBe']
L.sort()
L
## ['ABD', 'aBe', 'abc']
L = ['abc', 'ABD', 'aBe']
L.sort(key = str.lower)
L
## ['abc', 'ABD', 'aBe']
L = ['abc', 'ABD', 'aBe']
L.sort(key = str.lower, reverse = True)
L
## ['aBe', 'ABD', 'abc']
L = ['spam', 'eggs', 'ham']
L.index('eggs')

# L.index('egg')
# ValueError: 'egg' is not in list
# 
# Detailed traceback: 
#   File "<string>", line 1, in <module>
## 1
L.insert(1, 'toast')
L
## ['spam', 'toast', 'eggs', 'ham']
L.remove('eggs')
L
## ['spam', 'toast', 'ham']
L.pop(1)                              # delete by position
## 'toast'
L
## ['spam', 'ham']
['1', '2', '1', '1', '3'].count('1')  # number of occurences
## 3
L = ['spam', 'eggs', 'ham', 'toast']
del L[0]
L
## ['eggs', 'ham', 'toast']
del L[1:3]
L
## ['eggs']

tuple

  • tuple與list類似,但是tuple只能被建立而不能被修改
  • str、list與tuple在Python中皆視為有順序的『序列』
import random
t1 = (1, 2, 2, random.gauss(10, 2))
t1
## (1, 2, 2, 7.277264609390826)
type(t1)
## <class 'tuple'>
t2 = 2, 4, 8, 1
t2
## (2, 4, 8, 1)
type(t2)
## <class 'tuple'>
max(t2)
## 8
t3 = 3,
type(t3)
## <class 'tuple'>
3 in [3, 4, 7, 9, 1]
## True
one, two, three, four = 1, 2, 3, 4  # 自動打包(packing)後自動解包(unpacking),同時指定4個變數值
one
## 1
two
## 2
  • 自動打包與自動解包不只適用於tuple,只要是『序列型別』都適用。
v1, v2, v3 = [1, 2, 3]
v1
## 1
w1, w2, w3 = 'abc'
w2

# q1, q2 = 'ABC' # 多重指定變數值時,兩邊數量要一樣多
# ValueError: too many values to unpack (expected 2)
# 
# Detailed traceback: 
#   File "<string>", line 1, in <module>
## 'b'
  • *號版本自動解包 –> 產生list
a, b, *c = (1, 2, 3, 4)
a
## 1
b
## 2
c
## [3, 4]
a, *b, c = (1, 2, 3, 4)
a
## 1
b
## [2, 3]
c
## 4
*a, b, c = (1, 2, 3, 4)
a
## [1, 2]
b
## 3
c
## 4
a,b,c
## ([1, 2], 3, 4)
a, b, c, d, *e = (1, 2, 3, 4)
a
## 1
b
## 2
c
## 3
e
## []
x = [1, 2, 3, 4, 5]
a, b, *_ = x
a
## 1
b
## 2
_
## [3, 4, 5]

可使用list()函數將任何序列型資料轉為list。而tuple()函數則轉為tuple。

list((1, 2, 3, 4))
## [1, 2, 3, 4]
tuple([1, 2, 3, 4])
## (1, 2, 3, 4)
list('NCCU')
## ['N', 'C', 'C', 'U']
tuple('Money and Banking')
## ('M', 'o', 'n', 'e', 'y', ' ', 'a', 'n', 'd', ' ', 'B', 'a', 'n', 'k', 'i', 'n', 'g')

set

set代表無順序的資料所構成。反之,list與tuple之元素則有順序意義。而set中重複的資料會被自動刪除不計。值得注意的是,set中的元素須為『不可變的資料』,故整數、浮點數、字串、與tuple可作為set的元素。而list、dict與set本身則不行。

l1 = l2 = [1, 2, 3] # list為可變資料
l1
## [1, 2, 3]
l2
## [1, 2, 3]
l1[0] = 100
l1
## [100, 2, 3]
l2                 # 因list為『可變資料』,所以l2隨l1變化而改變
## [100, 2, 3]
n1 = n2 = 1      # int為不可變資料
n1
## 1
n2
## 1
n1 = 100
n1
## 100
n2               # 因int為『不可變資料』,所以n2不隨n1變化而改變
## 1
x = {1, 2, 1, 3, 3, 1, 2, 4}
x
## {1, 2, 3, 4}
type(x)
## <class 'set'>
x = set([1, 2, 1, 3, 3, 1, 2, 4])
x
## {1, 2, 3, 4}
type(x)
## <class 'set'>
x.add(6)
x
## {1, 2, 3, 4, 6}
x.remove(2)
x
## {1, 3, 4, 6}
3 in x
## True
5 in x
## False
x = set([1, 2, 3, 1, 3, 5])
z = frozenset(x)
type(x)
## <class 'set'>
type(z)

# z.add(6)
# AttributeError: 'frozenset' object has no attribute 'add'
# 
# Detailed traceback: 
#   File "<string>", line 1, in <module>
## <class 'frozenset'>
x.add(z)
x
## {1, 2, 3, 5, frozenset({1, 2, 3, 5})}

dict

  • 鍵(key):值(value) 的對應方式建立的資料結構。鍵可為 『整數』『字串』、或其他Python物件。
  • 但字典內資料並 按照字面上的順序作為索引排序。
  • 字典可以作為紀錄、搜尋表及其他 key 比 _value_更為重要之用途使用。而字典索引是一種非常有效率的搜索方式。

建立dict

ages = {'Mary':13, 'John': 14, 'Tony':13}
type(ages)
## <class 'dict'>
ages
## {'Mary': 13, 'John': 14, 'Tony': 13}
'Mary' in ages
## True
x = {}
x
## {}
type(x)
## <class 'dict'>
x[0] = 'NCCU'  # 此0是當作key,並非當作索引用
x[1] = 'Money and Banking'
x
## {0: 'NCCU', 1: 'Money and Banking'}
x[1]
## 'Money and Banking'
len(x)
## 2
# empty list
y = []
type(y)
# y[0] = 'NCCU'  # 指定一個不存在的索引值0,值得注意的是:R允許這個操作
# ndexError: list assignment index out of range
# 
# Detailed traceback: 
#   File "<string>", line 1, in <module>
## <class 'list'>
l <- list()
class(l)
## [1] "list"
l[1] <- "NCCU"
l
## [[1]]
## [1] "NCCU"

dict索引

complex

import math
import cmath
1 + 1j
## (1+1j)
1 - 2J # 大寫J也可以

# NOT RUN
# math.sqrt(-1) 
## (1-2j)
cmath.sqrt(-1 + 0j)
## 1j
cmath.sqrt(-1)
## 1j

None

特殊的基本型態,代表『不存在』或是『空值』的意義(類似R中的 NULL )。
None在Python亦代表 佔位符號(place holder),用來表示資料中某一個欄位目前尚未得知具體之值,先保留該位置,之後再填值(類似R中 NA 的功能)。

type(None)
## <class 'NoneType'>
None == False
## False
None == 0
## False
None == None # None只會等於自己
## True
False == 0  

# 與R比較 (Not Run):
# len(None)
# TypeError: object of type 'NoneType' has no len()
## True

Inf

float('Inf')
## inf
float('inf')
## inf
float('INF')
## inf
import math
math.inf
## inf
math.isinf(float("-inf")) # OUTPUT:True. Return True if x is a positive or negative infinity, and False otherwise.
## True
math.isinf(float("inf"))  # OUTPUT:True
# by comparing to infinity
## True
float("inf") == float("inf") # OUTPUT:True
## True
float("-inf") == float("-inf") # OUTPUT:True
## True
float("inf") == float("-inf")
## False

補充:Reserved words (keywords)

『保留字(Reserved words)』 不可做為變數名稱
R:
if, else, repeat, while, function,

for, in, next, break,TRUE, FALSE,

NULL, Inf, NaN, NA, NA_integer_,

NA_real_, NA_complex_, NA_character_

Python:
and, def, False, import, not,

True, as, del, finally, in, or,

try, assert, elif, for, is, pass,

while, break, else, from, lambda, print,

with, class, except, global, None, raise,

yield, continue, exec, if, nonlocal, return

補充:R套件安裝

# install.packages("xts")
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
search()
##  [1] ".GlobalEnv"        "package:xts"       "package:zoo"      
##  [4] "package:stats"     "package:graphics"  "package:grDevices"
##  [7] "package:utils"     "package:datasets"  "package:methods"  
## [10] "Autoloads"         "package:base"
stats::rnorm(n = 5)
## [1] -0.07624891  0.15594520 -0.09452932  0.95981448 -0.63450988

補充:循環補齊(recycling)

善用循環補齊、即 元素對元素運算(向量化運算) ,執行速度較快。

1 == 1
## [1] TRUE
1 == 2
## [1] FALSE
v1 <- 1:5
v2 <- 1:5
v3 <- 1:10
v4 <- 1:7
v1 + v4 # 會出現警告,因為長度並非互為倍數
## Warning in v1 + v4: 較長的物件長度並非較短物件長度的倍數
## [1]  2  4  6  8 10  7  9
v1 + 100 # 後面會進行循環補齊,形成具五個100的向量,並做元素對元素相加
## [1] 101 102 103 104 105
v1 * 4   # 後面會進行循環補齊,形成具五個4的向量,並做元素對元素相乘
## [1]  4  8 12 16 20
v1 >= 3 # 循環補齊做循環比較
## [1] FALSE FALSE  TRUE  TRUE  TRUE
x <- c(1, 2, 3, NA, 5, NA, 8)
x == NA   # 會得出NA,要小心
## [1] NA NA NA NA NA NA NA
2 == NA   # 會得出NA,要小心
## [1] NA
# is.___ 為「疑問句」
# as.___ 為「當作」
is.na(x)
## [1] FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
set.seed(seed = 100) # 設定亂數種子
d <- rnorm(n = 100)
# 求出d > 1.96的數字個數
d > 1.96  # 可看出哪些值滿足,得出邏輯向量
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
##  [97] FALSE FALSE FALSE FALSE
sum(d > 1.96) # 求個數,藉由「強制型別轉換」,TRUE為數值1,FALSE為數值0
## [1] 3
mean(d > 1.96) #求機率
## [1] 0.03