Save and Load r source

source("data_load.R")

NA & null

a <- NA
is.na(a)
## [1] TRUE
is.null(a) #변수가 초기화 되었는지 확인할 때
## [1] FALSE
c(TRUE, TRUE) & c(TRUE, TRUE) #벡터연산, 각 원소간 계
## [1] TRUE TRUE
c(TRUE, TRUE) && c(TRUE, TRUE) # 한 개의 boolean 값끼리의 연산
## [1] TRUE

Factor

sex <- factor("m", c("m", "f"))
sex
## [1] m
## Levels: m f
nlevels(sex)
## [1] 2
levels(sex)
## [1] "m" "f"
levels(sex)[1]
## [1] "m"
levels(sex)[2]
## [1] "f"
levels(sex) <- c("male", "female")
sex
## [1] male
## Levels: male female

ordered(c("good", "nomal", "bad"))
## [1] good  nomal bad  
## Levels: bad < good < nomal
factor(c("good","nomal","bad"), ordered=TRUE)
## [1] good  nomal bad  
## Levels: bad < good < nomal

Vector

한가지 유형의 스칼라만 저장할 수 있다.

x <- c(1, 2, 3)
x <- 1:10 # [1] 1 2 3 4 5 6 7 8 9 10
x <- seq(1, 10, 2) # [1] 1 3 5 7 9
x <- rep(1:2, 5) # [1] 1 2 1 2 1 2 1 2 1 2
x <- rep(1:2, each=5) # [1] 1 1 1 1 1 2 2 2 2 2

seq_along(c("a", "b", "c")) # 주어진 데이터의 길이만큼 1,2,3,...,N으로 구성된 벡터를 반환
## [1] 1 2 3
seq_len(3) # N값이 인자로 주어지면 1,2,...,N으로 구성된 벡터를 반환
## [1] 1 2 3
x <- seq_len(5)
x
## [1] 1 2 3 4 5

x <- c(1, 3, 4, 5)
names(x) <- c("kim", "seo", "park", "choi")
x[1]
## kim 
##   1
x[-1] # kim을 제외한 요소를 보여준다
##  seo park choi 
##    3    4    5
x[c(1,3)]
##  kim park 
##    1    4
x[1:3]
##  kim  seo park 
##    1    3    4
x["seo"]
## seo 
##   3
x[c("park", "choi")]
## park choi 
##    4    5
names(x)[2]
## [1] "seo"
length(x)
## [1] 4
nrow(x) # nrow()는 행렬만 가능
## NULL
NROW(x) # NROW()는 벡터와 행렬 모두 가능
## [1] 4

Vector Operator

"a" %in% c("a", "b", "c")
## [1] TRUE
setdiff(c("a", "b", "c"), c("a", "d"))
## [1] "b" "c"
union(c("a", "b", "c"), c("a", "d"))
## [1] "a" "b" "c" "d"
intersect(c("a", "b", "c"), c("a", "d"))
## [1] "a"
setequal(c("a", "b", "c"), c("a", "d")) # 집합간 비교
## [1] FALSE

List

x <- list(name="foo", height=70)
x
## $name
## [1] "foo"
## 
## $height
## [1] 70
x <- list(name="foo", height=c(1, 3, 5))
x
## $name
## [1] "foo"
## 
## $height
## [1] 1 3 5
x$name
## [1] "foo"
x$height
## [1] 1 3 5
x[1] # '(name, foo)'를 갖고 있는 리스트 반환
## $name
## [1] "foo"
x[[1]] # 값을 반환
## [1] "foo"
x[2]
## $height
## [1] 1 3 5
x[[2]]
## [1] 1 3 5

Matrix

행렬에는 한가지 유형의 스칼라만 저장할 수 있다.

matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3)
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), ncol=3)
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3, byrow=TRUE)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12

matrix(c(1,2,3,4,5,6), nrow=3, dimnames=list(c("item1","item2","item3"),c("A","B")))
##       A B
## item1 1 4
## item2 2 5
## item3 3 6

x <- matrix(c(1,2,3,4,5,6,7,8,9,10,11,12), nrow=3,dimnames=list(c("item1","item2","item3"),c("A","B","C","D")))
x
##       A B C  D
## item1 1 4 7 10
## item2 2 5 8 11
## item3 3 6 9 12
x[1, 2]
## [1] 4
x[1:2,]
##       A B C  D
## item1 1 4 7 10
## item2 2 5 8 11
x[-2,]
##       A B C  D
## item1 1 4 7 10
## item3 3 6 9 12
x[2,1:2]
## A B 
## 2 5
x[c(1,3),c(1,3)]
##       A C
## item1 1 7
## item3 3 9
x["item1",]
##  A  B  C  D 
##  1  4  7 10
x[c("item1","item3"),]
##       A B C  D
## item1 1 4 7 10
## item3 3 6 9 12

Matrix Operator

x <- matrix(c(1,2,3,4,5,6,7,8,9), nrow=3)
x
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
x * 2
##      [,1] [,2] [,3]
## [1,]    2    8   14
## [2,]    4   10   16
## [3,]    6   12   18
x + x
##      [,1] [,2] [,3]
## [1,]    2    8   14
## [2,]    4   10   16
## [3,]    6   12   18
x %*% x
##      [,1] [,2] [,3]
## [1,]   30   66  102
## [2,]   36   81  126
## [3,]   42   96  150
t(x) # 전치행렬
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
ncol(x)
## [1] 3
nrow(x)
## [1] 3

역행렬

x <- matrix(c(1,2,3,4), nrow=2)
x
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
solve(x)
##      [,1] [,2]
## [1,]   -2  1.5
## [2,]    1 -0.5

Array

matrix(1:6, ncol = 3)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
array(1:6, dim = c(2, 3))
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
x <- array(1:12, dim = c(2, 2, 3))
x[1, 2, 3]
## [1] 11
x[, , 3]
##      [,1] [,2]
## [1,]    9   11
## [2,]   10   12
dim(x)
## [1] 2 2 3

Data Frame

d <- data.frame(name = c("choi","park","lee","kim","moon"), age=c(49,32,16,35,55), zender=c("M","F","M","F","M"))
d
##   name age zender
## 1 choi  49      M
## 2 park  32      F
## 3  lee  16      M
## 4  kim  35      F
## 5 moon  55      M
d$ex <- c(3, 6, 9, 12, 15) # 새로운 데이터 추가
d
##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15
d$x
## NULL
d[1,]
##   name age zender ex
## 1 choi  49      M  3
d[1, 2]
## [1] 49
d[c(1,3), 3]
## [1] M M
## Levels: F M
d[c(1,3), 3, drop=FALSE]
##   zender
## 1      M
## 3      M
d[-1,]
##   name age zender ex
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15
d[, -3]
##   name age ex
## 1 choi  49  3
## 2 park  32  6
## 3  lee  16  9
## 4  kim  35 12
## 5 moon  55 15
d[-5, -c(1, 3)]
##   age ex
## 1  49  3
## 2  32  6
## 3  16  9
## 4  35 12
d[, c("name", "ex")]
##   name ex
## 1 choi  3
## 2 park  6
## 3  lee  9
## 4  kim 12
## 5 moon 15

str(d)
## 'data.frame':    5 obs. of  4 variables:
##  $ name  : Factor w/ 5 levels "choi","kim","lee",..: 1 5 3 2 4
##  $ age   : num  49 32 16 35 55
##  $ zender: Factor w/ 2 levels "F","M": 2 1 2 1 2
##  $ ex    : num  3 6 9 12 15
head(d, 3)
##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9

Data Frame : colnames(), rownames()

x <- data.frame(1:3)
x
##   X1.3
## 1    1
## 2    2
## 3    3
x <- data.frame(50, 35, 22)
x
##   X50 X35 X22
## 1  50  35  22
x <- data.frame(c(50, 35, 22))
x
##   c.50..35..22.
## 1            50
## 2            35
## 3            22
x <- data.frame(c(50, 35, 22, 16))
x
##   c.50..35..22..16.
## 1                50
## 2                35
## 3                22
## 4                16
colnames(x) <- c("age")
x
##   age
## 1  50
## 2  35
## 3  22
## 4  16
rownames(x) <- c("choi", "park", "lee", "kim")
x
##      age
## choi  50
## park  35
## lee   22
## kim   16

Data Frame : %in%

d
##   name age zender ex
## 1 choi  49      M  3
## 2 park  32      F  6
## 3  lee  16      M  9
## 4  kim  35      F 12
## 5 moon  55      M 15
d[, names(d) %in% c("name", "zender")]
##   name zender
## 1 choi      M
## 2 park      F
## 3  lee      M
## 4  kim      F
## 5 moon      M
d[, !names(d) %in% c("name")]
##   age zender ex
## 1  49      M  3
## 2  32      F  6
## 3  16      M  9
## 4  35      F 12
## 5  55      M 15

타입 판별

class(c(1, 2)) # [1] "numeric"
class(matrix(c(1, 2))) # [1] "matrix"
class(list(c(1, 2))) # [1] "list"
class(data.frame(x=c(1, 2))) # [1] "data.frame"
class(c("a", "b")) # [1] "character"
str(c(1, 2))
##  num [1:2] 1 2
str(matrix(c(1, 2)))
##  num [1:2, 1] 1 2
str(list(c(1, 2)))
## List of 1
##  $ : num [1:2] 1 2
str(data.frame(x=c(1, 2)))
## 'data.frame':    2 obs. of  1 variable:
##  $ x: num  1 2
str(c("a", "b"))
##  chr [1:2] "a" "b"
is.numeric(c(1, 2, 3)) # [1] TRUE
is.numeric(c("A", "B")) # [1] FALSE
is.matrix(matrix(c(1, 2))) # [1] TRUE

타입 변환 : as.factor(), as.numeric(), as.data.frame(), as.matrix()

x <- matrix(c(1:4))
class(x)
## [1] "matrix"
x <- data.frame(x)
class(x)
## [1] "data.frame"
x <- c("m","f")
class(x)
## [1] "character"
x <- as.factor(x)
class(x)
## [1] "factor"
x
## [1] m f
## Levels: f m
factor(x, levels = c("m", "f"))
## [1] m f
## Levels: m f