Notes for r4pda Ch 1 ~ Ch 3. 데이터 타입

Save and Load r source

source("data_load.R")

NA & null

a <- NA
is.na(a)

## [1] TRUE

is.null(a) #변수가 초기화 되었는지 확인할 때

## [1] FALSE

c(TRUE, TRUE) & c(TRUE, TRUE) #벡터연산, 각 원소간 계

## [1] TRUE TRUE

c(TRUE, TRUE) && c(TRUE, TRUE) # 한 개의 boolean 값끼리의 연산

## [1] TRUE

Factor

sex <- factor("m", c("m", "f"))
sex

## [1] m
## Levels: m f

nlevels(sex)

## [1] 2

levels(sex)

## [1] "m" "f"

levels(sex)[1]

## [1] "m"

levels(sex)[2]

## [1] "f"

levels(sex) <- c("male", "female")
sex

## [1] male
## Levels: male female

ordered(c("good", "nomal", "bad"))

## [1] good  nomal bad  
## Levels: bad < good < nomal

factor(c("good","nomal","bad"), ordered=TRUE)

## [1] good  nomal bad  
## Levels: bad < good < nomal

Vector

한가지 유형의 스칼라만 저장할 수 있다.

x <- c(1, 2, 3)
x <- 1:10 # [1] 1 2 3 4 5 6 7 8 9 10
x <- seq(1, 10, 2) # [1] 1 3 5 7 9
x <- rep(1:2, 5) # [1] 1 2 1 2 1 2 1 2 1 2
x <- rep(1:2, each=5) # [1] 1 1 1 1 1 2 2 2 2 2

seq_along(c("a", "b", "c")) # 주어진 데이터의 길이만큼 1,2,3,...,N으로 구성된 벡터를 반환

## [1] 1 2 3

seq_len(3) # N값이 인자로 주어지면 1,2,...,N으로 구성된 벡터를 반환

## [1] 1 2 3

x <- seq_len(5)
x

## [1] 1 2 3 4 5

x <- c(1, 3, 4, 5)
names(x) <- c("kim", "seo", "park", "choi")
x[1]

## kim 
##   1

x[-1] # kim을 제외한 요소를 보여준다

##  seo park choi 
##    3    4    5

x[c(1,3)]

##  kim park 
##    1    4

x[1:3]

##  kim  seo park 
##    1    3    4

x["seo"]

## seo 
##   3

x[c("park", "choi")]

## park choi 
##    4    5

names(x)[2]

## [1] "seo"

length(x)

## [1] 4

nrow(x) # nrow()는 행렬만 가능

## NULL

NROW(x) # NROW()는 벡터와 행렬 모두 가능

## [1] 4

Vector Operator

"a" %in% c("a", "b", "c")

## [1] TRUE

setdiff(c("a", "b", "c"), c("a", "d"))

## [1] "b" "c"

union(c("a", "b", "c"), c("a", "d"))

## [1] "a" "b" "c" "d"

intersect(c("a", "b", "c"), c("a", "d"))

## [1] "a"

setequal(c("a", "b", "c"), c("a", "d")) # 집합간 비교

## [1] FALSE

List

x <- list(name="foo", height=70)
x

## $name
## [1] "foo"
## 
## $height
## [1] 70

x <- list(name="foo", height=c(1, 3, 5))
x

## $name
## [1] "foo"
## 
## $height
## [1] 1 3 5

x$name

## [1] "foo"

x$height

## [1] 1 3 5

x[1] # '(name, foo)'를 갖고 있는 리스트 반환

## $name
## [1] "foo"

x[[1]] # 값을 반환

## [1] "foo"

x[2]

## $height
## [1] 1 3 5

x[[2]]

## [1] 1 3 5

Matrix

행렬에는 한가지 유형의 스칼라만 저장할 수 있다.

matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3)

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), ncol=3)

##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12

matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3, byrow=TRUE)

##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12

matrix(c(1,2,3,4,5,6), nrow=3, dimnames=list(c("item1","item2","item3"),c("A","B")))

##       A B
## item1 1 4
## item2 2 5
## item3 3 6

x <- matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3,dimnames=list(c("item1","item2","item3"),c("A","B","C","D")))
x

##       A B C  D
## item1 1 4 7 10
## item2 2 5 8 11
## item3 3 6 9 12

x[1, 2]

## [1] 4

x[1:2,]

##       A B C  D
## item1 1 4 7 10
## item2 2 5 8 11

x[-2,]

##       A B C  D
## item1 1 4 7 10
## item3 3 6 9 12

x[2,1:2]

## A B 
## 2 5

x[c(1,3),c(1,3)]

##       A C
## item1 1 7
## item3 3 9

x["item1",]

##  A  B  C  D 
##  1  4  7 10

x[c("item1","item3"),]

##       A B C  D
## item1 1 4 7 10
## item3 3 6 9 12

Matrix Operator

x <- matrix(c(1,2,3,4,5,6,7,8,9), nrow=3)
x

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

x * 2

##      [,1] [,2] [,3]
## [1,]    2    8   14
## [2,]    4   10   16
## [3,]    6   12   18

x + x

##      [,1] [,2] [,3]
## [1,]    2    8   14
## [2,]    4   10   16
## [3,]    6   12   18

x %*% x

##      [,1] [,2] [,3]
## [1,]   30   66  102
## [2,]   36   81  126
## [3,]   42   96  150

t(x) # 전치행렬

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

ncol(x)

## [1] 3

nrow(x)

## [1] 3

역행렬

x <- matrix(c(1,2,3,4), nrow=2)
x

##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4

solve(x)

##      [,1] [,2]
## [1,]   -2  1.5
## [2,]    1 -0.5

Array

matrix(1:6, ncol = 3)

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

array(1:6, dim = c(2, 3))

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

x <- array(1:12, dim = c(2, 2, 3))
x[1, 2, 3]

## [1] 11

x[, , 3]

##      [,1] [,2]
## [1,]    9   11
## [2,]   10   12

dim(x)

## [1] 2 2 3

Data Frame

d <- data.frame(name = c("choi","park","lee","kim","moon"), age=c(49,32,16,35,55), zender=c("M","F","M","F","M"))
d

##   name age zender
## 1 choi  49      M
## 2 park  32      F
## 3  lee  16      M
## 4  kim  35      F
## 5 moon  55      M

d$ex <- c(3, 6, 9, 12, 15) # 새로운 데이터 추가
d

##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15

d$x

## NULL

d[1,]

##   name age zender ex
## 1 choi  49      M  3

d[1, 2]

## [1] 49

d[c(1,3), 3]

## [1] M M
## Levels: F M

d[c(1,3), 3, drop=FALSE]

##   zender
## 1      M
## 3      M

d[-1,]

##   name age zender ex
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15

d[, -3]

##   name age ex
## 1 choi  49  3
## 2 park  32  6
## 3  lee  16  9
## 4  kim  35 12
## 5 moon  55 15

d[-5, -c(1, 3)]

##   age ex
## 1  49  3
## 2  32  6
## 3  16  9
## 4  35 12

d[, c("name", "ex")]

##   name ex
## 1 choi  3
## 2 park  6
## 3  lee  9
## 4  kim 12
## 5 moon 15

str(d)

## 'data.frame':    5 obs. of  4 variables:
##  $ name  : Factor w/ 5 levels "choi","kim","lee",..: 1 5 3 2 4
##  $ age   : num  49 32 16 35 55
##  $ zender: Factor w/ 2 levels "F","M": 2 1 2 1 2
##  $ ex    : num  3 6 9 12 15

head(d, 3)

##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9

Data Frame : colnames(), rownames()

x <- data.frame(1:3)
x

##   X1.3
## 1    1
## 2    2
## 3    3

x <- data.frame(50, 35, 22)
x

##   X50 X35 X22
## 1  50  35  22

x <- data.frame(c(50, 35, 22))
x

##   c.50..35..22.
## 1            50
## 2            35
## 3            22

x <- data.frame(c(50, 35, 22, 16))
x

##   c.50..35..22..16.
## 1                50
## 2                35
## 3                22
## 4                16

colnames(x) <- c("age")
x

##   age
## 1  50
## 2  35
## 3  22
## 4  16

rownames(x) <- c("choi", "park", "lee", "kim")
x

##      age
## choi  50
## park  35
## lee   22
## kim   16

Data Frame : %in%

##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15

d[, names(d) %in% c("name", "zender")]

##   name zender
## 1 choi      M
## 2 park      F
## 3  lee      M
## 4  kim      F
## 5 moon      M

d[, !names(d) %in% c("name")]

##   age zender ex
## 1  49      M  3
## 2  32      F  6
## 3  16      M  9
## 4  35      F 12
## 5  55      M 15

타입 판별

class(c(1, 2)) # [1] "numeric"
class(matrix(c(1, 2))) # [1] "matrix"
class(list(c(1, 2))) # [1] "list"
class(data.frame(x=c(1, 2))) # [1] "data.frame"
class(c("a", "b")) # [1] "character"

str(c(1, 2))

##  num [1:2] 1 2

str(matrix(c(1, 2)))

##  num [1:2, 1] 1 2

str(list(c(1, 2)))

## List of 1
##  $ : num [1:2] 1 2

str(data.frame(x=c(1, 2)))

## 'data.frame':    2 obs. of  1 variable:
##  $ x: num  1 2

str(c("a", "b"))

##  chr [1:2] "a" "b"

is.numeric(c(1, 2, 3)) # [1] TRUE
is.numeric(c("A", "B")) # [1] FALSE
is.matrix(matrix(c(1, 2))) # [1] TRUE

타입 변환 : as.factor(), as.numeric(), as.data.frame(), as.matrix()

x <- matrix(c(1:4))
class(x)

## [1] "matrix"

x <- data.frame(x)
class(x)

## [1] "data.frame"

x <- c("m","f")
class(x)

## [1] "character"

x <- as.factor(x)
class(x)

## [1] "factor"

## [1] m f
## Levels: f m

factor(x, levels = c("m", "f"))

## [1] m f
## Levels: m f

Notes for r4pda Ch 1 ~ Ch 3. 데이터 타입

2017 May 22/23/24/26

Save and Load r source

NA & null

Factor

Vector

Vector Operator

List

Matrix

Matrix Operator

Array

Data Frame

Data Frame : colnames(), rownames()

Data Frame : %in%

타입 판별

타입 변환 : as.factor(), as.numeric(), as.data.frame(), as.matrix()