R 프로그래밍 기초

R 프로그래밍 기초, 따라하며 배우기

R에 기본을 되짚어 보기에 찾아본 강의 중 최고는 곽기영 교수님의 유튜브 강의입니다. R 프로그래밍 / R 기초 by 곽기영 on Youtube (우클릭 새창으로 여세요) 확실한 기본을 잡고자 한다면 강의 보며, 따라하기 강력 추천! 여기는 따라하며 해본것들 한번에 훓어보기 위한 기록입니다.

이 노트 관련 문의는 제이스’s 블로그 에 댓글로 부탁드립니다.

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see https://rmarkdown.rstudio.com.

벡터, 팩터

c(), seq(), rep(), str(), length()

c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

##  [1]  1  2  3  4  5  6  7  8  9 10

c("we", "love", "data", "analytics")

## [1] "we"        "love"      "data"      "analytics"

c(TRUE, FALSE, TRUE, FALSE)

## [1]  TRUE FALSE  TRUE FALSE

odd <- c(1, 3, 5)
even <- c(2, 4, 6)
odd

## [1] 1 3 5

even

## [1] 2 4 6

c(odd, even)

## [1] 1 3 5 2 4 6

3:9

## [1] 3 4 5 6 7 8 9

9:3

## [1] 9 8 7 6 5 4 3

5:-3

## [1]  5  4  3  2  1  0 -1 -2 -3

seq(from = 3, to = 9)

## [1] 3 4 5 6 7 8 9

seq(from = 3, to = 9, by = 2)

## [1] 3 5 7 9

seq(from = 1.5, to = 1.5, by = -0.5)

## [1] 1.5

seq(from = 0, to = 100, length.out = 5)

## [1]   0  25  50  75 100

seq(from = -1, to = 1, length.out = 5)

## [1] -1.0 -0.5  0.0  0.5  1.0

rep(1, times = 3)

## [1] 1 1 1

rep(c(1, 2, 3), times = 3)

## [1] 1 2 3 1 2 3 1 2 3

rep(c(1, 2, 3), each = 3)

## [1] 1 1 1 2 2 2 3 3 3

rep(c(1, 2, 3), times = c(1, 2, 3))

## [1] 1 2 2 3 3 3

rep(c(1, 2, 3), length.out = 8)

## [1] 1 2 3 1 2 3 1 2

num <- c(1, 2, 3)
cha <- c("x", "y", "z")
c(num, cha)

## [1] "1" "2" "3" "x" "y" "z"

str(num)

##  num [1:3] 1 2 3

str(cha)

##  chr [1:3] "x" "y" "z"

length(num)

## [1] 3

LETTERS

##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"

letters

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

month.name

##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"

month.abb

##  [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"

pi

## [1] 3.141593

month <- c(12, 9, 3, 5, 1)
month

## [1] 12  9  3  5  1

month.name[month]

## [1] "December"  "September" "March"     "May"       "January"

연산자

1 + 2

## [1] 3

"+"(1, 2)

## [1] 3

c(1, 2, 3) + c(4, 5, 6)

## [1] 5 7 9

c(1, 2, 3) * c(4, 5, 6)

## [1]  4 10 18

c(10, 20, 30) / c(2, 4, 6)

## [1] 5 5 5

c(10, 20, 30) %% c(3, 5, 7)

## [1] 1 0 2

c(10, 20, 30) %/% c(3, 5, 7)

## [1] 3 4 4

c(1, 2, 3) + c(4, 5, 6, 7, 8, 9)

## [1]  5  7  9  8 10 12

c(1, 2, 3, 1, 2, 3) + c(4, 5, 6, 7, 8, 9)

## [1]  5  7  9  8 10 12

c(1, 3, 5) + 10

## [1] 11 13 15

c(1, 2, 3) + c(4, 5, 6, 7, 8)

## Warning in c(1, 2, 3) + c(4, 5, 6, 7, 8): 두 객체의 길이가 서로 배수관계에 있지
## 않습니다

## [1]  5  7  9  8 10

v <- pi
w <- 10 / 3

v == w

## [1] FALSE

v != w

## [1] TRUE

v > w

## [1] FALSE

v < w

## [1] TRUE

!(v > w)

## [1] TRUE

(v == w) | (v < w)

## [1] TRUE

(v == w) & (v < w)

## [1] FALSE

isTRUE(v == w)

## [1] FALSE

y <- c(0, 25, 50, 75, 100)
z <- c(50, 50, 50, 50, 50)
y == z

## [1] FALSE FALSE  TRUE FALSE FALSE

y != z

## [1]  TRUE  TRUE FALSE  TRUE  TRUE

y > z

## [1] FALSE FALSE FALSE  TRUE  TRUE

y < z

## [1]  TRUE  TRUE FALSE FALSE FALSE

y == 50

## [1] FALSE FALSE  TRUE FALSE FALSE

y > 50

## [1] FALSE FALSE FALSE  TRUE  TRUE

as.numeric(TRUE)

## [1] 1

as.numeric(FALSE)

## [1] 0

TRUE * TRUE

## [1] 1

TRUE * FALSE

## [1] 0

TRUE + TRUE

## [1] 2

y <-  c(0, 25, 50, 75, 100)
y > 50

## [1] FALSE FALSE FALSE  TRUE  TRUE

sum(y > 50)

## [1] 2

any(-3:3 > 0)

## [1] TRUE

all(-3:3 > 0)

## [1] FALSE

sqrt(2) ^ 2 == 2

## [1] FALSE

sqrt(2) ^ 2 - 2

## [1] 4.440892e-16

identical(sqrt(2) ^ 2 , 2)

## [1] FALSE

all.equal(sqrt(2) ^ 2 , 2)

## [1] TRUE

all.equal(sqrt(2) ^ 2, 3)

## [1] "Mean relative difference: 0.5"

isTRUE(all.equal(sqrt(2) ^ 2, 3))

## [1] FALSE

fruit <- c("Apple", "Banana", "Strawberry")
food <- c("Pie", "Juice", "Cake")
paste(fruit, food)

## [1] "Apple Pie"       "Banana Juice"    "Strawberry Cake"

paste(fruit, "Juice")

## [1] "Apple Juice"      "Banana Juice"     "Strawberry Juice"

벡터 함수

abs(-3:3)

## [1] 3 2 1 0 1 2 3

log(1:5) # 자연로그

## [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379

log(1:5, base = exp(1))

## [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379

log2(1:5)

## [1] 0.000000 1.000000 1.584963 2.000000 2.321928

log10(1:10)

##  [1] 0.0000000 0.3010300 0.4771213 0.6020600 0.6989700 0.7781513 0.8450980
##  [8] 0.9030900 0.9542425 1.0000000

exp(1:5) # e ^ 1:5

## [1]   2.718282   7.389056  20.085537  54.598150 148.413159

y <- exp(1:5)
y

## [1]   2.718282   7.389056  20.085537  54.598150 148.413159

log(y)

## [1] 1 2 3 4 5

factorial(1:5)

## [1]   1   2   6  24 120

choose(5, 2) # 조합

## [1] 10

sqrt(1:5)

## [1] 1.000000 1.414214 1.732051 2.000000 2.236068

options("digits") # 유효자리수

## $digits
## [1] 7

pi

## [1] 3.141593

pi * 100

## [1] 314.1593

signif(456.789, digits = 2) # 보다 작지 않은 정수

## [1] 460

signif(456.789, digits = 3)

## [1] 457

signif(456.789, digits = 4)

## [1] 456.8

round(456.789, digits = 2) # 반올림

## [1] 456.79

round(456.789, digits = 1)

## [1] 456.8

round(456.789)

## [1] 457

round(sqrt(1:5), digits = 2)

## [1] 1.00 1.41 1.73 2.00 2.24

round(456.789, digits = -2)

## [1] 500

round(456.789, digits = -1)

## [1] 460

round(11.5) # 가까운 짝수

## [1] 12

round(10.5)

## [1] 10

round(12.5)

## [1] 12

round(13.5)

## [1] 14

round(-3.5)

## [1] -4

round(-4.5)

## [1] -4

floor(456.789) # 내림

## [1] 456

floor(-456.789) # 내림

## [1] -457

ceiling(456.789) # 올림

## [1] 457

ceiling(-456.789) # 올림

## [1] -456

trunc(456.789) # 0에 가까운 정수

## [1] 456

trunc(-456.789) # 0에 가까운 정수

## [1] -456

3 / 0

## [1] Inf

5 - Inf

## [1] -Inf

Inf * Inf # Inf 1.8 * 10 ^ 308

## [1] Inf

Inf * -Inf

## [1] -Inf

is.infinite(10 ^ (305:310))

## [1] FALSE FALSE FALSE FALSE  TRUE  TRUE

Inf / Inf

## [1] NaN

Inf * 0

## [1] NaN

log(-2)

## Warning in log(-2): NaN이 생성되었습니다

## [1] NaN

NaN + 3 # Not a Number

## [1] NaN

is.nan(NaN + 3)

## [1] TRUE

k <- NA # Not Available
k

## [1] NA

k + 5

## [1] NA

sqrt(k)

## [1] NA

is.na(k)

## [1] TRUE

is.na(k + 5)

## [1] TRUE

is.na(NaN)

## [1] TRUE

z <- 1:5
z

## [1] 1 2 3 4 5

sum(z)

## [1] 15

prod(z)

## [1] 120

max(z)

## [1] 5

min(z)

## [1] 1

w <- c(1, 2, 3, 4, 5, NA)
sum(w, na.rm = TRUE)

## [1] 15

na.omit(w)

## [1] 1 2 3 4 5
## attr(,"na.action")
## [1] 6
## attr(,"class")
## [1] "omit"

sum(na.omit(w))

## [1] 15

v <- c(NA, NA, NA, NA, NA)
v

## [1] NA NA NA NA NA

sum(v, na.rm = TRUE)

## [1] 0

prod(v, na.rm = TRUE)

## [1] 1

max(v, na.rm = TRUE)

## Warning in max(v, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를 반
## 환합니다

## [1] -Inf

min(v, na.rm = TRUE)

## Warning in min(v, na.rm = TRUE): min에 전달되는 인자들 중 누락이 있어 Inf를 반환
## 합니다

## [1] Inf

traffic.death <- c(842, 729, 786, 751, 844, 851, 702)
cumsum(traffic.death) # 누적

## [1]  842 1571 2357 3108 3952 4803 5505

cumprod(traffic.death)

## [1] 8.420000e+02 6.138180e+05 4.824609e+08 3.623282e+11 3.058050e+14
## [6] 2.602400e+17 1.826885e+20

cummax(traffic.death)

## [1] 842 842 842 842 844 851 851

cummin(traffic.death)

## [1] 842 729 729 729 729 729 702

cumsum(c(3, 5, 1, NA, 2))

## [1]  3  8  9 NA NA

traffic.death

## [1] 842 729 786 751 844 851 702

diff(traffic.death)

## [1] -113   57  -35   93    7 -149

diff(c(3, 5, 1, NA, 2))

## [1]  2 -4 NA NA

diff(1:5, lag = 2) # 3-1, 4-2, 5-3

## [1] 2 2 2

p <- 1:10
q <- 6:15
union(p, q) # 합집합

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15

intersect(p, q) # 교집합

## [1]  6  7  8  9 10

setdiff(p, q) # 차집합

## [1] 1 2 3 4 5

setequal(p, q)

## [1] FALSE

is.element(setdiff(p, q), p) # 포함, 부분집합

## [1] TRUE TRUE TRUE TRUE TRUE

is.element(setdiff(p, q), q) # 포함, 부분집합

## [1] FALSE FALSE FALSE FALSE FALSE

벡터 인덱싱

num <- 0:30
num

##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [26] 25 26 27 28 29 30

prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime

## [1]  2  3  5  7 11 13 17 19

prime[1]

## [1] 2

prime[2]

## [1] 3

prime[1:3]

## [1] 2 3 5

prime[4:6]

## [1]  7 11 13

prime[c(1, 1, 5, 5)]

## [1]  2  2 11 11

prime[c(1, 3, 5, 7)]

## [1]  2  5 11 17

prime[c(7, 5, 3, 1)]

## [1] 17 11  5  2

indices <- c(1, 3, 5, 7)
prime[indices]

## [1]  2  5 11 17

prime[-1]

## [1]  3  5  7 11 13 17 19

prime[-1:-3]

## [1]  7 11 13 17 19

prime[-(1:3)]

## [1]  7 11 13 17 19

length(prime)

## [1] 8

prime[1:length(prime) - 1]

## [1]  2  3  5  7 11 13 17

prime[-length(prime)]

## [1]  2  3  5  7 11 13 17

prime <- c(2, 4, 5, 7, 11, 14, 17, 18)
prime

## [1]  2  4  5  7 11 14 17 18

prime[2] <- 3
prime

## [1]  2  3  5  7 11 14 17 18

prime[c(6, 8)] <- c(13, 19)
prime

## [1]  2  3  5  7 11 13 17 19

prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime

## [1]  2  3  5  7 11 13 17 19

length(prime)

## [1] 8

prime[9] <- 23
prime

## [1]  2  3  5  7 11 13 17 19 23

prime[c(10, 11)] <- c(29, 31)
prime

##  [1]  2  3  5  7 11 13 17 19 23 29 31

prime[15] <- 47
prime

##  [1]  2  3  5  7 11 13 17 19 23 29 31 NA NA NA 47

prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime < 10

## [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE

prime[prime < 10]

## [1] 2 3 5 7

prime %% 2 == 0

## [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

prime[prime %% 2 == 0]

## [1] 2

sweq_along(), which(), names()

seq_along(prime)

## [1] 1 2 3 4 5 6 7 8

seq_along(prime) %% 2 == 0

## [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE

prime[seq_along(prime) %% 2 == 0]

## [1]  3  7 13 19

prime[seq_along(prime) %% 3 == 0]

## [1]  5 13

prime[c(FALSE, TRUE)]

## [1]  3  7 13 19

prime[c(FALSE, FALSE, TRUE)]

## [1]  5 13

rainfall <- c(21.6, 23.6, 45.8, 77.0, 102.2, 133.3, 327.9, 348.0,
              137.6, 49.3,53.0, 24.9)
rainfall

##  [1]  21.6  23.6  45.8  77.0 102.2 133.3 327.9 348.0 137.6  49.3  53.0  24.9

rainfall > 100

##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE

which(rainfall > 100)

## [1] 5 6 7 8 9

month.name[which(rainfall > 100)]

## [1] "May"       "June"      "July"      "August"    "September"

month.abb[which(rainfall > 100)]

## [1] "May" "Jun" "Jul" "Aug" "Sep"

which.max(rainfall)

## [1] 8

month.name[which.max(rainfall)]

## [1] "August"

month.name[which.min(rainfall)]

## [1] "January"

rainfall > 100

##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE

rainfall[rainfall > 100]

## [1] 102.2 133.3 327.9 348.0 137.6

rainfall[which.min(rainfall)]

## [1] 21.6

rainfall[which.max(rainfall)]

## [1] 348

traffic.death <- c(842, 729, 786, 751, 844, 851, 702)
traffic.death

## [1] 842 729 786 751 844 851 702

names(traffic.death) <- c("Mon", "Tue", "Wed", "Thu",
                          "Fri", "Sat", "Sun")
traffic.death

## Mon Tue Wed Thu Fri Sat Sun 
## 842 729 786 751 844 851 702

traffic.death["Sat"]

## Sat 
## 851

traffic.death[c("Tue", "Thu", "Sun")]

## Tue Thu Sun 
## 729 751 702

weekend <- c("Fri", "Sat", "Sun")
traffic.death[weekend]

## Fri Sat Sun 
## 844 851 702

traffic.death > 800

##   Mon   Tue   Wed   Thu   Fri   Sat   Sun 
##  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE

traffic.death[traffic.death > 800]

## Mon Fri Sat 
## 842 844 851

names(traffic.death[traffic.death > 800])

## [1] "Mon" "Fri" "Sat"

팩터

review <- c("Good", "Good", "Indifferent", "Bad", "Good", "Bad")
review

## [1] "Good"        "Good"        "Indifferent" "Bad"         "Good"       
## [6] "Bad"

review.factor <- factor(review)
review.factor

## [1] Good        Good        Indifferent Bad         Good        Bad        
## Levels: Bad Good Indifferent

review

## [1] "Good"        "Good"        "Indifferent" "Bad"         "Good"       
## [6] "Bad"

str(review)

##  chr [1:6] "Good" "Good" "Indifferent" "Bad" "Good" "Bad"

str(review.factor)

##  Factor w/ 3 levels "Bad","Good","Indifferent": 2 2 3 1 2 1

as.numeric(review.factor)

## [1] 2 2 3 1 2 1

eventday <- c("Mon", "Mon", "Tue", "Wed", "Mon",
              "Wed", "Thu", "Fri", "Tue")
eventday.factor <- factor(eventday)
eventday.factor

## [1] Mon Mon Tue Wed Mon Wed Thu Fri Tue
## Levels: Fri Mon Thu Tue Wed

eventday.factor <- factor(eventday,
                          levels = c("Mon", "Tue", "Wed", "Thu",
                                     "Fri", "Sat", "Sun"))
eventday.factor

## [1] Mon Mon Tue Wed Mon Wed Thu Fri Tue
## Levels: Mon Tue Wed Thu Fri Sat Sun

levels(review.factor)

## [1] "Bad"         "Good"        "Indifferent"

levels(review.factor) <- c("B", "G", "I")
levels(review.factor)

## [1] "B" "G" "I"

review.factor

## [1] G G I B G B
## Levels: B G I

nlevels(review.factor)

## [1] 3

length(levels(review.factor))

## [1] 3

eval <- c("Medium", "Low", "High", "Medium", "High")
eval.factor <- factor(eval)
eval.factor

## [1] Medium Low    High   Medium High  
## Levels: High Low Medium

eval.ordered <- factor(eval, levels = c("Low", "Medium", "High"),
                       ordered = TRUE)
eval.ordered

## [1] Medium Low    High   Medium High  
## Levels: Low < Medium < High

table(eval.factor)

## eval.factor
##   High    Low Medium 
##      2      1      2

table(eval.ordered)

## eval.ordered
##    Low Medium   High 
##      1      2      2

sex <- c(2, 1, 2, 2, 1, 0)
sex.factor <- factor(sex, levels = c(1, 2),
                     labels = c("Male", "Female"))
sex.factor

## [1] Female Male   Female Female Male   <NA>  
## Levels: Male Female

table(sex.factor)

## sex.factor
##   Male Female 
##      2      3

행렬, 배열

dim(), matrix()

v <- 1:12
v

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12

dim(v) <- c(3, 4)
v

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

v <- 1:12
matrix(data = v, nrow = 3, ncol = 4)

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

matrix(data = v, nrow = 3, ncol = 4, byrow = TRUE)

##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12

rnames <- c("R1", "R2", "R3")
colnames <- c("C1", "C2", "C3", "C4")
matrix(data = v, nrow = 3, ncol = 4,
       dimnames = list(rnames, colnames))

##    C1 C2 C3 C4
## R1  1  4  7 10
## R2  2  5  8 11
## R3  3  6  9 12

matrix(0, 3, 4)

##      [,1] [,2] [,3] [,4]
## [1,]    0    0    0    0
## [2,]    0    0    0    0
## [3,]    0    0    0    0

matrix(NA, 3, 4)

##      [,1] [,2] [,3] [,4]
## [1,]   NA   NA   NA   NA
## [2,]   NA   NA   NA   NA
## [3,]   NA   NA   NA   NA

mat <- matrix(v, ncol = 4)
mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

str(mat)

##  int [1:3, 1:4] 1 2 3 4 5 6 7 8 9 10 ...

dim(mat)

## [1] 3 4

dim(mat)[1]

## [1] 3

dim(mat)[2]

## [1] 4

nrow(mat)

## [1] 3

ncol(mat)

## [1] 4

length(mat)

## [1] 12

rbind(), cbind()

v1 <- c(1, 2, 3, 4, 5)
v2 <- c(5, 7, 8, 9, 10)
rbind(v1, v2)

##    [,1] [,2] [,3] [,4] [,5]
## v1    1    2    3    4    5
## v2    5    7    8    9   10

cbind(v1, v2)

##      v1 v2
## [1,]  1  5
## [2,]  2  7
## [3,]  3  8
## [4,]  4  9
## [5,]  5 10

cbind(1:3, 4:6, matrix(7:12, 3, 2))

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

rbind(matrix(1:6, 2, 3), matrix(7:12, 2, 3))

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## [3,]    7    9   11
## [4,]    8   10   12

연산

mtx <- matrix(1:6, 2, 3)
mtx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

mtx + 1

##      [,1] [,2] [,3]
## [1,]    2    4    6
## [2,]    3    5    7

mtx - 1

##      [,1] [,2] [,3]
## [1,]    0    2    4
## [2,]    1    3    5

mtx * 2

##      [,1] [,2] [,3]
## [1,]    2    6   10
## [2,]    4    8   12

mtx / 2

##      [,1] [,2] [,3]
## [1,]  0.5  1.5  2.5
## [2,]  1.0  2.0  3.0

a <- matrix(1:6, 2, 3)
b <- matrix(6:1, 2, 3)
a + b

##      [,1] [,2] [,3]
## [1,]    7    7    7
## [2,]    7    7    7

a - b

##      [,1] [,2] [,3]
## [1,]   -5   -1    3
## [2,]   -3    1    5

a * b

##      [,1] [,2] [,3]
## [1,]    6   12   10
## [2,]   10   12    6

a / b

##           [,1]     [,2] [,3]
## [1,] 0.1666667 0.750000  2.5
## [2,] 0.4000000 1.333333  6.0

c <- matrix(6:1, 3, 2)
c

##      [,1] [,2]
## [1,]    6    3
## [2,]    5    2
## [3,]    4    1

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

# a + c
a * b

##      [,1] [,2] [,3]
## [1,]    6   12   10
## [2,]   10   12    6

a <- matrix(1:6, 2, 3)
a

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

b <- matrix(1:6, 3, 2)
b

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6

a %*% b

##      [,1] [,2]
## [1,]   22   49
## [2,]   28   64

c <- matrix(1:6, 2, 3)

mtx <- matrix(1:6, 2, 3)
mtx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

mtx %*% 1:3

##      [,1]
## [1,]   22
## [2,]   28

#mtx %*% 1:2
1:2 %*% mtx

##      [,1] [,2] [,3]
## [1,]    5   11   17

mtx <- matrix(1:6, 2, 3)
mtx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

mtx + 1:3

##      [,1] [,2] [,3]
## [1,]    2    6    7
## [2,]    4    5    9

rowSums(), colSums(), rowMeans(), colMean(), t()

mtx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

rowSums(mtx)

## [1]  9 12

colSums(mtx)

## [1]  3  7 11

rowMeans(mtx)

## [1] 3 4

colMeans(mtx)

## [1] 1.5 3.5 5.5

t(mtx)

##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6

1:5

## [1] 1 2 3 4 5

t(1:5)

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5

mtx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

mtx[2,]

## [1] 2 4 6

t(mtx[2, ])

##      [,1] [,2] [,3]
## [1,]    2    4    6

인덱싱

v <- 1:12
mat <- matrix(v, 3, 4)
mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

str(mat)

##  int [1:3, 1:4] 1 2 3 4 5 6 7 8 9 10 ...

mat[1,]

## [1]  1  4  7 10

mat[, 3]

## [1] 7 8 9

mat[1, , drop = FALSE]

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10

mat[, 3, drop = FALSE]

##      [,1]
## [1,]    7
## [2,]    8
## [3,]    9

mat[2:3,]

##      [,1] [,2] [,3] [,4]
## [1,]    2    5    8   11
## [2,]    3    6    9   12

mat[, 3:4]

##      [,1] [,2]
## [1,]    7   10
## [2,]    8   11
## [3,]    9   12

mat[1:2, 2:3]

##      [,1] [,2]
## [1,]    4    7
## [2,]    5    8

mat[c(1, 3),]

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    3    6    9   12

mat[, -c(2, 3)]

##      [,1] [,2]
## [1,]    1   10
## [2,]    2   11
## [3,]    3   12

mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

mat[1, 3] <- 77
mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

mat[2,] <- c(22, 55)
mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]   22   55   22   55
## [3,]    3    6    9   12

mat[2:3, 3:4] <- c(1, 2, 3, 4)
mat

##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]   22   55    1    3
## [3,]    3    6    2    4

colnames(), rownames()

city.distance <- c(0, 331, 238, 269, 195,
                   331, 0, 95, 194, 189,
                   238, 95, 0, 171, 130,
                   269, 194, 171, 0, 77,
                   195, 189, 130, 77, 0)
city.distance

##  [1]   0 331 238 269 195 331   0  95 194 189 238  95   0 171 130 269 194 171   0
## [20]  77 195 189 130  77   0

city.distance.mat <- matrix(city.distance, 5, 5, byrow = TRUE)
city.distance.mat

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0  331  238  269  195
## [2,]  331    0   95  194  189
## [3,]  238   95    0  171  130
## [4,]  269  194  171    0   77
## [5,]  195  189  130   77    0

colnames(city.distance.mat) <- c("Seoul", "Busan", "Daegu",
                                 "Gwangju", "Jeonju")
rownames(city.distance.mat) <- c("Seoul", "Busan", "Daegu",
                                 "Gwangju", "Jeonju")
colnames(city.distance.mat)

## [1] "Seoul"   "Busan"   "Daegu"   "Gwangju" "Jeonju"

rownames(city.distance.mat)

## [1] "Seoul"   "Busan"   "Daegu"   "Gwangju" "Jeonju"

city.distance.mat

##         Seoul Busan Daegu Gwangju Jeonju
## Seoul       0   331   238     269    195
## Busan     331     0    95     194    189
## Daegu     238    95     0     171    130
## Gwangju   269   194   171       0     77
## Jeonju    195   189   130      77      0

city.distance.mat["Seoul", "Busan"]

## [1] 331

city.distance.mat[, "Seoul"]

##   Seoul   Busan   Daegu Gwangju  Jeonju 
##       0     331     238     269     195

city.distance.mat[c("Seoul", "Gwangju"),]

##         Seoul Busan Daegu Gwangju Jeonju
## Seoul       0   331   238     269    195
## Gwangju   269   194   171       0     77

배열

a <- 1:24
a

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24

dim(a) <- c(3, 4, 2)
a

## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24

array(1:12, c(2, 3, 2))

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12

ary <- array(1:12, c(2, 3, 2))
ary

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12

ary[1, 3, 2]

## [1] 11

ary[, 1, 2]

## [1] 7 8

ary[, 1, 2, drop = FALSE]

## , , 1
## 
##      [,1]
## [1,]    7
## [2,]    8

ary[2, ,]

##      [,1] [,2]
## [1,]    2    8
## [2,]    4   10
## [3,]    6   12

리스트

list(), names(), length(), unlist()

list(0.6826, 0.9544, 0.9974)

## [[1]]
## [1] 0.6826
## 
## [[2]]
## [1] 0.9544
## 
## [[3]]
## [1] 0.9974

list(1.23,
     "Apple",
     c(2, 3, 5, 7),
     matrix(1:6, ncol = 3),
     mean)

## [[1]]
## [1] 1.23
## 
## [[2]]
## [1] "Apple"
## 
## [[3]]
## [1] 2 3 5 7
## 
## [[4]]
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## [[5]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x0000000017ebee00>
## <environment: namespace:base>

lst <- list()
lst

## list()

lst[[1]] <- 1.23
lst[[2]] <- "Apple"
lst[[3]] <- c(2, 3, 5, 7)
lst[[4]] <- matrix(1:6, ncol = 3)
lst[[5]] <- mean
lst

## [[1]]
## [1] 1.23
## 
## [[2]]
## [1] "Apple"
## 
## [[3]]
## [1] 2 3 5 7
## 
## [[4]]
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## [[5]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x0000000017ebee00>
## <environment: namespace:base>

lst <- list(0.6826, 0.9544, 0.9974)
lst

## [[1]]
## [1] 0.6826
## 
## [[2]]
## [1] 0.9544
## 
## [[3]]
## [1] 0.9974

names(lst) <- c("sigma1", "sigma2", "sigma3")
lst

## $sigma1
## [1] 0.6826
## 
## $sigma2
## [1] 0.9544
## 
## $sigma3
## [1] 0.9974

names(lst)

## [1] "sigma1" "sigma2" "sigma3"

length(lst)

## [1] 3

worldcup1 <- list("Brazil", "Sourth Africa", "Germany")
worldcup2 <- list("Korea-Japan", "France", "USA")
c(worldcup1, worldcup2)

## [[1]]
## [1] "Brazil"
## 
## [[2]]
## [1] "Sourth Africa"
## 
## [[3]]
## [1] "Germany"
## 
## [[4]]
## [1] "Korea-Japan"
## 
## [[5]]
## [1] "France"
## 
## [[6]]
## [1] "USA"

a <- list(1, 2, 3, 4, 5, 6, 7)
mean(a)

## Warning in mean.default(a): argument is not numeric or logical: returning NA

## [1] NA

mean(unlist(a))

## [1] 4

min(unlist(a))

## [1] 1

max(unlist(a))

## [1] 7

인덱싱

product <- list("A002", "Mouse", 30000)
product

## [[1]]
## [1] "A002"
## 
## [[2]]
## [1] "Mouse"
## 
## [[3]]
## [1] 30000

product[[3]]

## [1] 30000

product[[2]]

## [1] "Mouse"

product[3]

## [[1]]
## [1] 30000

class(product[[3]])

## [1] "numeric"

class(product[3])

## [1] "list"

# product[3] * 0.9 # Error
product[[3]] * 0.9

## [1] 27000

product[c(1, 2)]

## [[1]]
## [1] "A002"
## 
## [[2]]
## [1] "Mouse"

product[c(FALSE, TRUE, TRUE)]

## [[1]]
## [1] "Mouse"
## 
## [[2]]
## [1] 30000

product[-1]

## [[1]]
## [1] "Mouse"
## 
## [[2]]
## [1] 30000

product <- list(id = "A002",
                name = "Mouse",
                price = 30000)
product

## $id
## [1] "A002"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000

product[["name"]]

## [1] "Mouse"

product$name

## [1] "Mouse"

product[c("name", "price")]

## $name
## [1] "Mouse"
## 
## $price
## [1] 30000

product[["fourth"]]

## NULL

product$fourth

## NULL

# product[[4]] # Error

product[c(4, 2, 5)]

## $<NA>
## NULL
## 
## $name
## [1] "Mouse"
## 
## $<NA>
## NULL

product[c("fourth", "name", "fifth")]

## $<NA>
## NULL
## 
## $name
## [1] "Mouse"
## 
## $<NA>
## NULL

lst <- list(one = 1,
            two = 2,
            three = list(alpha = 3.1, beta = 3.3))
lst

## $one
## [1] 1
## 
## $two
## [1] 2
## 
## $three
## $three$alpha
## [1] 3.1
## 
## $three$beta
## [1] 3.3

lst[["three"]]

## $alpha
## [1] 3.1
## 
## $beta
## [1] 3.3

lst[["three"]][["beta"]]

## [1] 3.3

lst$three$beta

## [1] 3.3

product <- list(id = "A001",
                name = "Mouse",
                price = 30000)
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000

product[[3]] <- 40000
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000

product[["price"]] <- 40000
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000

product$price <- 40000
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000

product[3] <- 40000
product["price"] <- 40000
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000

product[[3]] <- c(30000, 40000)
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000 40000

product[3] <- list(c(30000, 40000))
product

## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000 40000

product[1:3] <- list("A002", "Keyboard", 90000)
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000

product[[4]] <- c("Domestic", "Export")
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"

product$madein <- c("Korea", "China")
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"

product[["madein"]] <- c("Korea", "China")
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"

product["madein"] <- list(c("Korea", "China"))
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"

product[6:9] <- list(0.12, 0.15, 0.22, 0.27)
product

## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"
## 
## [[6]]
## [1] 0.12
## 
## [[7]]
## [1] 0.15
## 
## [[8]]
## [1] 0.22
## 
## [[9]]
## [1] 0.27

names <- c("Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun")
values <- c(842, 729, 786, 751, 844, 851, 702)

traffic.death <- list()
traffic.death

## list()

traffic.death[names] <- values
traffic.death

## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751
## 
## $Fri
## [1] 844
## 
## $Sat
## [1] 851
## 
## $Sun
## [1] 702

traffic.death[["Fri"]] <- NULL
traffic.death

## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751
## 
## $Sat
## [1] 851
## 
## $Sun
## [1] 702

traffic.death[c("Sat", "Sun")] <- NULL
traffic.death

## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751

traffic.death < 750

##   Mon   Tue   Wed  Thur 
## FALSE  TRUE FALSE FALSE

traffic.death[traffic.death < 750] <- NULL
traffic.death

## $Mon
## [1] 842
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751

데이터프레임

data.frame()

v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
data.frame(v1, v2, v3)

##     v1       v2    v3
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

data.frame(row.names = v1, v2, v3)

##            v2    v3
## A001    Mouse 30000
## A002 Keyboard 90000
## A003      USB 50000

product <- data.frame(id = v1, name = v2,
                      price = v3)
str(product)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: num  30000 90000 50000

product <- data.frame(id = v1, name = v2,
                      price = v3, stringsAsFactors = TRUE)
str(product)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : Factor w/ 3 levels "A001","A002",..: 1 2 3
##  $ name : Factor w/ 3 levels "Keyboard","Mouse",..: 2 1 3
##  $ price: num  30000 90000 50000

mat <- matrix(c(1, 3, 5, 7, 9,
                2, 4, 6, 8, 10,
                2, 3, 5, 7, 11), ncol = 3)
number <- as.data.frame(mat)
colnames(number) <- c("odd", "even", "prime")
number

##   odd even prime
## 1   1    2     2
## 2   3    4     3
## 3   5    6     5
## 4   7    8     7
## 5   9   10    11

v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
lst <- list(v1, v2, v3)
product <- as.data.frame(lst)
colnames(product) <- c("odd", "even", "prime")
product

##    odd     even prime
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

nrow(product)

## [1] 3

ncol(product)

## [1] 3

length(product)

## [1] 3

rbind(), cbind()

v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
product <- data.frame(id = v1, name = v2,
                      price = v3)
product

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

product <- rbind(product,
      c("A004", "Monitor", 250000))
product

##     id     name  price
## 1 A001    Mouse  30000
## 2 A002 Keyboard  90000
## 3 A003      USB  50000
## 4 A004  Monitor 250000

new.rows <- data.frame(id = c("A005", "A006"),
                       name = c("Memory", "CPU"),
                       price = c(35000, 320000))
product <- rbind(product, new.rows)
product

##     id     name  price
## 1 A001    Mouse  30000
## 2 A002 Keyboard  90000
## 3 A003      USB  50000
## 4 A004  Monitor 250000
## 5 A005   Memory  35000
## 6 A006      CPU 320000

product <- cbind(product,
                 madein = c("Korea", "China", "China", "Korea",
                            "Korea", "USA"))
product

##     id     name  price madein
## 1 A001    Mouse  30000  Korea
## 2 A002 Keyboard  90000  China
## 3 A003      USB  50000  China
## 4 A004  Monitor 250000  Korea
## 5 A005   Memory  35000  Korea
## 6 A006      CPU 320000    USA

product$madein = c("Korea", "China", "China", "Korea",
                            "Korea", "USA")
product

##     id     name  price madein
## 1 A001    Mouse  30000  Korea
## 2 A002 Keyboard  90000  China
## 3 A003      USB  50000  China
## 4 A004  Monitor 250000  Korea
## 5 A005   Memory  35000  Korea
## 6 A006      CPU 320000    USA

product$madeina = c("Korea", "China", "China", "Korea",
                            "Korea", "USA")
product

##     id     name  price madein madeina
## 1 A001    Mouse  30000  Korea   Korea
## 2 A002 Keyboard  90000  China   China
## 3 A003      USB  50000  China   China
## 4 A004  Monitor 250000  Korea   Korea
## 5 A005   Memory  35000  Korea   Korea
## 6 A006      CPU 320000    USA     USA

new.cols <- data.frame(manufacturer = c("Logitech",
                                        "Logitech",
                                        "Samsung",
                                        "Samsung",
                                        "Samsung",
                                        "Intel"),
                       quantity = c(20, 15, 50, 30, 40, 10))
new.cols

##   manufacturer quantity
## 1     Logitech       20
## 2     Logitech       15
## 3      Samsung       50
## 4      Samsung       30
## 5      Samsung       40
## 6        Intel       10

product <- cbind(product, new.cols)
product

##     id     name  price madein madeina manufacturer quantity
## 1 A001    Mouse  30000  Korea   Korea     Logitech       20
## 2 A002 Keyboard  90000  China   China     Logitech       15
## 3 A003      USB  50000  China   China      Samsung       50
## 4 A004  Monitor 250000  Korea   Korea      Samsung       30
## 5 A005   Memory  35000  Korea   Korea      Samsung       40
## 6 A006      CPU 320000    USA     USA        Intel       10

cols1 <- data.frame(x = c("a", "b", "c"),
                    y = c(1, 2, 3))
cols2 <- data.frame(x = c("alpha", "beta", "gamma"),
                    y = c(100, 200, 300))
cbind(cols1, cols2)

##   x y     x   y
## 1 a 1 alpha 100
## 2 b 2  beta 200
## 3 c 3 gamma 300

do.call()

df1 <- data.frame(sex = "female", months = 1, weight = 3.5)
df2 <- data.frame(sex = "male", months = 3, weight = 4.8)
df3 <- data.frame(sex = "male", months = 4, weight = 5.3)
df4 <- data.frame(sex = "female", months = 9, weight = 9.4)
df5 <- data.frame(sex = "female", months = 7, weight = 8.3)
lst <- list(df1, df2, df3, df4, df5)

lst[[1]]

##      sex months weight
## 1 female      1    3.5

lst[[2]]

##    sex months weight
## 1 male      3    4.8

rbind(lst[[1]], lst[[2]])

##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8

do.call(rbind, lst)

##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8
## 3   male      4    5.3
## 4 female      9    9.4
## 5 female      7    8.3

lst1 <- list(sex = "female", months = 1, weight = 3.5)
lst2 <- list(sex = "male", months = 3, weight = 4.8)
lst3 <- list(sex = "male", months = 4, weight = 5.3)
lst4 <- list(sex = "female", months = 9, weight = 9.4)
lst5 <- list(sex = "female", months = 7, weight = 8.3)
lst <- list(lst1, lst2, lst3, lst4, lst5)

lst[[1]]

## $sex
## [1] "female"
## 
## $months
## [1] 1
## 
## $weight
## [1] 3.5

as.data.frame(lst[[1]])

##      sex months weight
## 1 female      1    3.5

lapply(lst, as.data.frame)

## [[1]]
##      sex months weight
## 1 female      1    3.5
## 
## [[2]]
##    sex months weight
## 1 male      3    4.8
## 
## [[3]]
##    sex months weight
## 1 male      4    5.3
## 
## [[4]]
##      sex months weight
## 1 female      9    9.4
## 
## [[5]]
##      sex months weight
## 1 female      7    8.3

do.call(rbind, lapply(lst, as.data.frame))

##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8
## 3   male      4    5.3
## 4 female      9    9.4
## 5 female      7    8.3

인덱싱

us.state <- data.frame(state.abb,
                       state.name,
                       state.region,
                       state.area,
                       stringsAsFactors = FALSE)
us.state

##    state.abb     state.name  state.region state.area
## 1         AL        Alabama         South      51609
## 2         AK         Alaska          West     589757
## 3         AZ        Arizona          West     113909
## 4         AR       Arkansas         South      53104
## 5         CA     California          West     158693
## 6         CO       Colorado          West     104247
## 7         CT    Connecticut     Northeast       5009
## 8         DE       Delaware         South       2057
## 9         FL        Florida         South      58560
## 10        GA        Georgia         South      58876
## 11        HI         Hawaii          West       6450
## 12        ID          Idaho          West      83557
## 13        IL       Illinois North Central      56400
## 14        IN        Indiana North Central      36291
## 15        IA           Iowa North Central      56290
## 16        KS         Kansas North Central      82264
## 17        KY       Kentucky         South      40395
## 18        LA      Louisiana         South      48523
## 19        ME          Maine     Northeast      33215
## 20        MD       Maryland         South      10577
## 21        MA  Massachusetts     Northeast       8257
## 22        MI       Michigan North Central      58216
## 23        MN      Minnesota North Central      84068
## 24        MS    Mississippi         South      47716
## 25        MO       Missouri North Central      69686
## 26        MT        Montana          West     147138
## 27        NE       Nebraska North Central      77227
## 28        NV         Nevada          West     110540
## 29        NH  New Hampshire     Northeast       9304
## 30        NJ     New Jersey     Northeast       7836
## 31        NM     New Mexico          West     121666
## 32        NY       New York     Northeast      49576
## 33        NC North Carolina         South      52586
## 34        ND   North Dakota North Central      70665
## 35        OH           Ohio North Central      41222
## 36        OK       Oklahoma         South      69919
## 37        OR         Oregon          West      96981
## 38        PA   Pennsylvania     Northeast      45333
## 39        RI   Rhode Island     Northeast       1214
## 40        SC South Carolina         South      31055
## 41        SD   South Dakota North Central      77047
## 42        TN      Tennessee         South      42244
## 43        TX          Texas         South     267339
## 44        UT           Utah          West      84916
## 45        VT        Vermont     Northeast       9609
## 46        VA       Virginia         South      40815
## 47        WA     Washington          West      68192
## 48        WV  West Virginia         South      24181
## 49        WI      Wisconsin North Central      56154
## 50        WY        Wyoming          West      97914

str(us.state)

## 'data.frame':    50 obs. of  4 variables:
##  $ state.abb   : chr  "AL" "AK" "AZ" "AR" ...
##  $ state.name  : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ state.region: Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ state.area  : num  51609 589757 113909 53104 158693 ...

us.state[[2]]

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

str(us.state[[2]])

##  chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" "California" "Colorado" ...

us.state[2]

##        state.name
## 1         Alabama
## 2          Alaska
## 3         Arizona
## 4        Arkansas
## 5      California
## 6        Colorado
## 7     Connecticut
## 8        Delaware
## 9         Florida
## 10        Georgia
## 11         Hawaii
## 12          Idaho
## 13       Illinois
## 14        Indiana
## 15           Iowa
## 16         Kansas
## 17       Kentucky
## 18      Louisiana
## 19          Maine
## 20       Maryland
## 21  Massachusetts
## 22       Michigan
## 23      Minnesota
## 24    Mississippi
## 25       Missouri
## 26        Montana
## 27       Nebraska
## 28         Nevada
## 29  New Hampshire
## 30     New Jersey
## 31     New Mexico
## 32       New York
## 33 North Carolina
## 34   North Dakota
## 35           Ohio
## 36       Oklahoma
## 37         Oregon
## 38   Pennsylvania
## 39   Rhode Island
## 40 South Carolina
## 41   South Dakota
## 42      Tennessee
## 43          Texas
## 44           Utah
## 45        Vermont
## 46       Virginia
## 47     Washington
## 48  West Virginia
## 49      Wisconsin
## 50        Wyoming

str(us.state[2])

## 'data.frame':    50 obs. of  1 variable:
##  $ state.name: chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...

us.state[c(2, 4)] # 리스트 인덱싱

##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914

us.state[, 2] # 행렬 인덱싱

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

us.state[, 2, drop = FALSE]

##        state.name
## 1         Alabama
## 2          Alaska
## 3         Arizona
## 4        Arkansas
## 5      California
## 6        Colorado
## 7     Connecticut
## 8        Delaware
## 9         Florida
## 10        Georgia
## 11         Hawaii
## 12          Idaho
## 13       Illinois
## 14        Indiana
## 15           Iowa
## 16         Kansas
## 17       Kentucky
## 18      Louisiana
## 19          Maine
## 20       Maryland
## 21  Massachusetts
## 22       Michigan
## 23      Minnesota
## 24    Mississippi
## 25       Missouri
## 26        Montana
## 27       Nebraska
## 28         Nevada
## 29  New Hampshire
## 30     New Jersey
## 31     New Mexico
## 32       New York
## 33 North Carolina
## 34   North Dakota
## 35           Ohio
## 36       Oklahoma
## 37         Oregon
## 38   Pennsylvania
## 39   Rhode Island
## 40 South Carolina
## 41   South Dakota
## 42      Tennessee
## 43          Texas
## 44           Utah
## 45        Vermont
## 46       Virginia
## 47     Washington
## 48  West Virginia
## 49      Wisconsin
## 50        Wyoming

us.state[, c(2, 4)]

##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914

us.state[["state.name"]]

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

us.state$state.name

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

us.state[, "state.name"]

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

us.state[c("state.name", "state.area")]

##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914

us.state[, c("state.name", "state.area")]

##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914

state.x77

##                Population Income Illiteracy Life Exp Murder HS Grad Frost
## Alabama              3615   3624        2.1    69.05   15.1    41.3    20
## Alaska                365   6315        1.5    69.31   11.3    66.7   152
## Arizona              2212   4530        1.8    70.55    7.8    58.1    15
## Arkansas             2110   3378        1.9    70.66   10.1    39.9    65
## California          21198   5114        1.1    71.71   10.3    62.6    20
## Colorado             2541   4884        0.7    72.06    6.8    63.9   166
## Connecticut          3100   5348        1.1    72.48    3.1    56.0   139
## Delaware              579   4809        0.9    70.06    6.2    54.6   103
## Florida              8277   4815        1.3    70.66   10.7    52.6    11
## Georgia              4931   4091        2.0    68.54   13.9    40.6    60
## Hawaii                868   4963        1.9    73.60    6.2    61.9     0
## Idaho                 813   4119        0.6    71.87    5.3    59.5   126
## Illinois            11197   5107        0.9    70.14   10.3    52.6   127
## Indiana              5313   4458        0.7    70.88    7.1    52.9   122
## Iowa                 2861   4628        0.5    72.56    2.3    59.0   140
## Kansas               2280   4669        0.6    72.58    4.5    59.9   114
## Kentucky             3387   3712        1.6    70.10   10.6    38.5    95
## Louisiana            3806   3545        2.8    68.76   13.2    42.2    12
## Maine                1058   3694        0.7    70.39    2.7    54.7   161
## Maryland             4122   5299        0.9    70.22    8.5    52.3   101
## Massachusetts        5814   4755        1.1    71.83    3.3    58.5   103
## Michigan             9111   4751        0.9    70.63   11.1    52.8   125
## Minnesota            3921   4675        0.6    72.96    2.3    57.6   160
## Mississippi          2341   3098        2.4    68.09   12.5    41.0    50
## Missouri             4767   4254        0.8    70.69    9.3    48.8   108
## Montana               746   4347        0.6    70.56    5.0    59.2   155
## Nebraska             1544   4508        0.6    72.60    2.9    59.3   139
## Nevada                590   5149        0.5    69.03   11.5    65.2   188
## New Hampshire         812   4281        0.7    71.23    3.3    57.6   174
## New Jersey           7333   5237        1.1    70.93    5.2    52.5   115
## New Mexico           1144   3601        2.2    70.32    9.7    55.2   120
## New York            18076   4903        1.4    70.55   10.9    52.7    82
## North Carolina       5441   3875        1.8    69.21   11.1    38.5    80
## North Dakota          637   5087        0.8    72.78    1.4    50.3   186
## Ohio                10735   4561        0.8    70.82    7.4    53.2   124
## Oklahoma             2715   3983        1.1    71.42    6.4    51.6    82
## Oregon               2284   4660        0.6    72.13    4.2    60.0    44
## Pennsylvania        11860   4449        1.0    70.43    6.1    50.2   126
## Rhode Island          931   4558        1.3    71.90    2.4    46.4   127
## South Carolina       2816   3635        2.3    67.96   11.6    37.8    65
## South Dakota          681   4167        0.5    72.08    1.7    53.3   172
## Tennessee            4173   3821        1.7    70.11   11.0    41.8    70
## Texas               12237   4188        2.2    70.90   12.2    47.4    35
## Utah                 1203   4022        0.6    72.90    4.5    67.3   137
## Vermont               472   3907        0.6    71.64    5.5    57.1   168
## Virginia             4981   4701        1.4    70.08    9.5    47.8    85
## Washington           3559   4864        0.6    71.72    4.3    63.5    32
## West Virginia        1799   3617        1.4    69.48    6.7    41.6   100
## Wisconsin            4589   4468        0.7    72.48    3.0    54.5   149
## Wyoming               376   4566        0.6    70.29    6.9    62.9   173
##                  Area
## Alabama         50708
## Alaska         566432
## Arizona        113417
## Arkansas        51945
## California     156361
## Colorado       103766
## Connecticut      4862
## Delaware         1982
## Florida         54090
## Georgia         58073
## Hawaii           6425
## Idaho           82677
## Illinois        55748
## Indiana         36097
## Iowa            55941
## Kansas          81787
## Kentucky        39650
## Louisiana       44930
## Maine           30920
## Maryland         9891
## Massachusetts    7826
## Michigan        56817
## Minnesota       79289
## Mississippi     47296
## Missouri        68995
## Montana        145587
## Nebraska        76483
## Nevada         109889
## New Hampshire    9027
## New Jersey       7521
## New Mexico     121412
## New York        47831
## North Carolina  48798
## North Dakota    69273
## Ohio            40975
## Oklahoma        68782
## Oregon          96184
## Pennsylvania    44966
## Rhode Island     1049
## South Carolina  30225
## South Dakota    75955
## Tennessee       41328
## Texas          262134
## Utah            82096
## Vermont          9267
## Virginia        39780
## Washington      66570
## West Virginia   24070
## Wisconsin       54464
## Wyoming         97203

str(state.x77)

##  num [1:50, 1:8] 3615 365 2212 2110 21198 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ : chr [1:8] "Population" "Income" "Illiteracy" "Life Exp" ...

states <- data.frame(state.x77)
str(states)

## 'data.frame':    50 obs. of  8 variables:
##  $ Population: num  3615 365 2212 2110 21198 ...
##  $ Income    : num  3624 6315 4530 3378 5114 ...
##  $ Illiteracy: num  2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
##  $ Life.Exp  : num  69 69.3 70.5 70.7 71.7 ...
##  $ Murder    : num  15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
##  $ HS.Grad   : num  41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
##  $ Frost     : num  20 152 15 65 20 166 139 103 11 60 ...
##  $ Area      : num  50708 566432 113417 51945 156361 ...

row.names(states)

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

states$Name <- row.names(states)
row.names(states) <- NULL
head(states)

##   Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area       Name
## 1       3615   3624        2.1    69.05   15.1    41.3    20  50708    Alabama
## 2        365   6315        1.5    69.31   11.3    66.7   152 566432     Alaska
## 3       2212   4530        1.8    70.55    7.8    58.1    15 113417    Arizona
## 4       2110   3378        1.9    70.66   10.1    39.9    65  51945   Arkansas
## 5      21198   5114        1.1    71.71   10.3    62.6    20 156361 California
## 6       2541   4884        0.7    72.06    6.8    63.9   166 103766   Colorado

rich.states <- states[states$Income > 5000, c("Name", "Income")]
rich.states

##            Name Income
## 2        Alaska   6315
## 5    California   5114
## 7   Connecticut   5348
## 13     Illinois   5107
## 20     Maryland   5299
## 28       Nevada   5149
## 30   New Jersey   5237
## 34 North Dakota   5087

large.states <- states[states$Area > 100000, c("Name", "Area")]
large.states

##          Name   Area
## 2      Alaska 566432
## 3     Arizona 113417
## 5  California 156361
## 6    Colorado 103766
## 26    Montana 145587
## 28     Nevada 109889
## 31 New Mexico 121412
## 43      Texas 262134

merge(rich.states, large.states)

##         Name Income   Area
## 1     Alaska   6315 566432
## 2 California   5114 156361
## 3     Nevada   5149 109889

merge(rich.states, large.states, all = TRUE)

##            Name Income   Area
## 1        Alaska   6315 566432
## 2       Arizona     NA 113417
## 3    California   5114 156361
## 4      Colorado     NA 103766
## 5   Connecticut   5348     NA
## 6      Illinois   5107     NA
## 7      Maryland   5299     NA
## 8       Montana     NA 145587
## 9        Nevada   5149 109889
## 10   New Jersey   5237     NA
## 11   New Mexico     NA 121412
## 12 North Dakota   5087     NA
## 13        Texas     NA 262134

with(), within()

head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

r <- iris$Sepal.Length / iris$Sepal.Width
head(r)

## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615

with(iris, Sepal.Length / Sepal.Width)

##   [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615 1.352941 1.470588
##   [9] 1.517241 1.580645 1.459459 1.411765 1.600000 1.433333 1.450000 1.295455
##  [17] 1.384615 1.457143 1.500000 1.342105 1.588235 1.378378 1.277778 1.545455
##  [25] 1.411765 1.666667 1.470588 1.485714 1.529412 1.468750 1.548387 1.588235
##  [33] 1.268293 1.309524 1.580645 1.562500 1.571429 1.361111 1.466667 1.500000
##  [41] 1.428571 1.956522 1.375000 1.428571 1.342105 1.600000 1.342105 1.437500
##  [49] 1.432432 1.515152 2.187500 2.000000 2.225806 2.391304 2.321429 2.035714
##  [57] 1.909091 2.041667 2.275862 1.925926 2.500000 1.966667 2.727273 2.103448
##  [65] 1.931034 2.161290 1.866667 2.148148 2.818182 2.240000 1.843750 2.178571
##  [73] 2.520000 2.178571 2.206897 2.200000 2.428571 2.233333 2.068966 2.192308
##  [81] 2.291667 2.291667 2.148148 2.222222 1.800000 1.764706 2.161290 2.739130
##  [89] 1.866667 2.200000 2.115385 2.033333 2.230769 2.173913 2.074074 1.900000
##  [97] 1.965517 2.137931 2.040000 2.035714 1.909091 2.148148 2.366667 2.172414
## [105] 2.166667 2.533333 1.960000 2.517241 2.680000 2.000000 2.031250 2.370370
## [113] 2.266667 2.280000 2.071429 2.000000 2.166667 2.026316 2.961538 2.727273
## [121] 2.156250 2.000000 2.750000 2.333333 2.030303 2.250000 2.214286 2.033333
## [129] 2.285714 2.400000 2.642857 2.078947 2.285714 2.250000 2.346154 2.566667
## [137] 1.852941 2.064516 2.000000 2.225806 2.161290 2.225806 2.148148 2.125000
## [145] 2.030303 2.233333 2.520000 2.166667 1.823529 1.966667

head(r)

## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615

with(iris, {
  print(summary(Sepal.Length))
  plot(Sepal.Length, Sepal.Width)
  plot(Petal.Length, Petal.Width)
})

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900

with(iris, {
  stats <- summary(Sepal.Length)
  stats
})

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900

# stats

with(iris, {
  stats.nokeep <- summary(Sepal.Length)
  stats.keep <<- summary(Sepal.Length)
})
# stats.nokeep
stats.keep

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900

iris$Sepal.Ratio <- iris$Sepal.Length / iris$Sepal.Width
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Ratio
## 1          5.1         3.5          1.4         0.2  setosa    1.457143
## 2          4.9         3.0          1.4         0.2  setosa    1.633333
## 3          4.7         3.2          1.3         0.2  setosa    1.468750
## 4          4.6         3.1          1.5         0.2  setosa    1.483871
## 5          5.0         3.6          1.4         0.2  setosa    1.388889
## 6          5.4         3.9          1.7         0.4  setosa    1.384615

iris <- within(iris,
       Sepal.Ratio <- Sepal.Length / Sepal.Width)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Ratio
## 1          5.1         3.5          1.4         0.2  setosa    1.457143
## 2          4.9         3.0          1.4         0.2  setosa    1.633333
## 3          4.7         3.2          1.3         0.2  setosa    1.468750
## 4          4.6         3.1          1.5         0.2  setosa    1.483871
## 5          5.0         3.6          1.4         0.2  setosa    1.388889
## 6          5.4         3.9          1.7         0.4  setosa    1.384615

attach(), detach()

attach(iris)
search()

##  [1] ".GlobalEnv"        "iris"              "package:stats"    
##  [4] "package:graphics"  "package:grDevices" "package:utils"    
##  [7] "package:datasets"  "package:methods"   "Autoloads"        
## [10] "package:base"

r <- Sepal.Length / Sepal.Width
head(r)

## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615

detach(iris)
search()

## [1] ".GlobalEnv"        "package:stats"     "package:graphics" 
## [4] "package:grDevices" "package:utils"     "package:datasets" 
## [7] "package:methods"   "Autoloads"         "package:base"

attach(iris)
iris$Sepal.Length <- 0
head(iris$Sepal.Length)

## [1] 0 0 0 0 0 0

head(Sepal.Length)

## [1] 5.1 4.9 4.7 4.6 5.0 5.4

detach(iris)

attach(iris)
Sepal.Width <- Sepal.Width * 10
head(Sepal.Width)

## [1] 35 30 32 31 36 39

ls()

##  [1] "a"                 "ary"               "b"                
##  [4] "c"                 "cha"               "city.distance"    
##  [7] "city.distance.mat" "colnames"          "cols1"            
## [10] "cols2"             "df1"               "df2"              
## [13] "df3"               "df4"               "df5"              
## [16] "eval"              "eval.factor"       "eval.ordered"     
## [19] "even"              "eventday"          "eventday.factor"  
## [22] "food"              "fruit"             "indices"          
## [25] "iris"              "k"                 "large.states"     
## [28] "lst"               "lst1"              "lst2"             
## [31] "lst3"              "lst4"              "lst5"             
## [34] "mat"               "month"             "mtx"              
## [37] "names"             "new.cols"          "new.rows"         
## [40] "num"               "number"            "odd"              
## [43] "p"                 "prime"             "product"          
## [46] "q"                 "r"                 "rainfall"         
## [49] "review"            "review.factor"     "rich.states"      
## [52] "rnames"            "Sepal.Width"       "sex"              
## [55] "sex.factor"        "states"            "stats.keep"       
## [58] "traffic.death"     "us.state"          "v"                
## [61] "v1"                "v2"                "v3"               
## [64] "values"            "w"                 "weekend"          
## [67] "worldcup1"         "worldcup2"         "y"                
## [70] "z"

head(iris$Sepal.Width)

## [1] 3.5 3.0 3.2 3.1 3.6 3.9

detach(iris)
rm(Sepal.Width)

Sepal.Length <- c(4.5, 5.3, 6.7)
Sepal.Length

## [1] 4.5 5.3 6.7

attach(iris)

## The following object is masked _by_ .GlobalEnv:
## 
##     Sepal.Length

# plot(Sepal.Length, Sepal.Width)
Sepal.Length

## [1] 4.5 5.3 6.7

subset(), cor()

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

subset(mtcars,
       subset = (mpg > 30),
       select = mpg)

##                 mpg
## Fiat 128       32.4
## Honda Civic    30.4
## Toyota Corolla 33.9
## Lotus Europa   30.4

subset(mtcars,
       subset = (cyl == "4 cylinders" & am == 0),
       select = c(mpg, hp, wt))

## [1] mpg hp  wt 
## <0 rows> (or 0-length row.names)

subset(mtcars,
       subset = (mpg > mean(mpg)),
       select = c(mpg, cyl, wt))

##                 mpg cyl    wt
## Mazda RX4      21.0   6 2.620
## Mazda RX4 Wag  21.0   6 2.875
## Datsun 710     22.8   4 2.320
## Hornet 4 Drive 21.4   6 3.215
## Merc 240D      24.4   4 3.190
## Merc 230       22.8   4 3.150
## Fiat 128       32.4   4 2.200
## Honda Civic    30.4   4 1.615
## Toyota Corolla 33.9   4 1.835
## Toyota Corona  21.5   4 2.465
## Fiat X1-9      27.3   4 1.935
## Porsche 914-2  26.0   4 2.140
## Lotus Europa   30.4   4 1.513
## Volvo 142E     21.4   4 2.780

head(USArrests)

##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

cor(USArrests)

##              Murder   Assault   UrbanPop      Rape
## Murder   1.00000000 0.8018733 0.06957262 0.5635788
## Assault  0.80187331 1.0000000 0.25887170 0.6652412
## UrbanPop 0.06957262 0.2588717 1.00000000 0.4113412
## Rape     0.56357883 0.6652412 0.41134124 1.0000000

subset(USArrests, select = -UrbanPop)

##                Murder Assault Rape
## Alabama          13.2     236 21.2
## Alaska           10.0     263 44.5
## Arizona           8.1     294 31.0
## Arkansas          8.8     190 19.5
## California        9.0     276 40.6
## Colorado          7.9     204 38.7
## Connecticut       3.3     110 11.1
## Delaware          5.9     238 15.8
## Florida          15.4     335 31.9
## Georgia          17.4     211 25.8
## Hawaii            5.3      46 20.2
## Idaho             2.6     120 14.2
## Illinois         10.4     249 24.0
## Indiana           7.2     113 21.0
## Iowa              2.2      56 11.3
## Kansas            6.0     115 18.0
## Kentucky          9.7     109 16.3
## Louisiana        15.4     249 22.2
## Maine             2.1      83  7.8
## Maryland         11.3     300 27.8
## Massachusetts     4.4     149 16.3
## Michigan         12.1     255 35.1
## Minnesota         2.7      72 14.9
## Mississippi      16.1     259 17.1
## Missouri          9.0     178 28.2
## Montana           6.0     109 16.4
## Nebraska          4.3     102 16.5
## Nevada           12.2     252 46.0
## New Hampshire     2.1      57  9.5
## New Jersey        7.4     159 18.8
## New Mexico       11.4     285 32.1
## New York         11.1     254 26.1
## North Carolina   13.0     337 16.1
## North Dakota      0.8      45  7.3
## Ohio              7.3     120 21.4
## Oklahoma          6.6     151 20.0
## Oregon            4.9     159 29.3
## Pennsylvania      6.3     106 14.9
## Rhode Island      3.4     174  8.3
## South Carolina   14.4     279 22.5
## South Dakota      3.8      86 12.8
## Tennessee        13.2     188 26.9
## Texas            12.7     201 25.5
## Utah              3.2     120 22.9
## Vermont           2.2      48 11.2
## Virginia          8.5     156 20.7
## Washington        4.0     145 26.2
## West Virginia     5.7      81  9.3
## Wisconsin         2.6      53 10.8
## Wyoming           6.8     161 15.6

cor(subset(USArrests, select = -UrbanPop))

##            Murder   Assault      Rape
## Murder  1.0000000 0.8018733 0.5635788
## Assault 0.8018733 1.0000000 0.6652412
## Rape    0.5635788 0.6652412 1.0000000

cor(subset(USArrests, select = -c(UrbanPop, Rape)))

##            Murder   Assault
## Murder  1.0000000 0.8018733
## Assault 0.8018733 1.0000000

sqldf()

library(sqldf)

## Loading required package: gsubfn

## Loading required package: proto

## Loading required package: RSQLite

data("mtcars")
sqldf("select * from mtcars where mpg > 30", row.names = TRUE)

##                 mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
## Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2

sqldf("select * from mtcars where cyl == '6 cylinders' order by mpg", row.names = TRUE)

##  [1] mpg  cyl  disp hp   drat wt   qsec vs   am   gear carb
## <0 rows> (or 0-length row.names)

sqldf("select avg(mpg) as avg_mpg, avg(wt) as avg_wt, gear from mtcars where carb in (4, 6) group by gear")

##   avg_mpg  avg_wt gear
## 1   12.62 4.68580    3
## 2   19.75 3.09375    4
## 3   17.75 2.97000    5

data(iris)
sqldf("select distinct Species from iris")

##      Species
## 1     setosa
## 2 versicolor
## 3  virginica

sqldf("select * from iris limit 3")

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa

sqldf("select avg([Sepal.Length]) from iris where Species = 'setosa'")

##   avg([Sepal.Length])
## 1               5.006

sqldf('select avg("Sepal.Length") from iris where Species = "setosa"
      ')

##   avg("Sepal.Length")
## 1               5.006

텍스트

nchar(), strsplit(), paste(), outer()

x <- "We have a dream"
nchar(x)

## [1] 15

length(x)

## [1] 1

y <- c("we", "have", "a", "dream")
nchar(y)

## [1] 2 4 1 5

length(y)

## [1] 4

nchar(y[4])

## [1] 5

letters

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

sort(letters, decreasing = TRUE)

##  [1] "z" "y" "x" "w" "v" "u" "t" "s" "r" "q" "p" "o" "n" "m" "l" "k" "j" "i" "h"
## [20] "g" "f" "e" "d" "c" "b" "a"

fox.says <- "It is only with the HEART"
tolower(fox.says)

## [1] "it is only with the heart"

toupper(fox.says)

## [1] "IT IS ONLY WITH THE HEART"

strsplit(fox.says, split = " ")

## [[1]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"

strsplit(fox.says, split = "")

## [[1]]
##  [1] "I" "t" " " "i" "s" " " "o" "n" "l" "y" " " "w" "i" "t" "h" " " "t" "h" "e"
## [20] " " "H" "E" "A" "R" "T"

unlist(strsplit(fox.says, split = " "))

## [1] "It"    "is"    "only"  "with"  "the"   "HEART"

strsplit(fox.says, split = " ")[[1]][[3]]

## [1] "only"

unlist(strsplit(fox.says, split = " "))[[3]]

## [1] "only"

littleprince <- c(x, fox.says)
strsplit(littleprince, " ")

## [[1]]
## [1] "We"    "have"  "a"     "dream"
## 
## [[2]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"

strsplit(littleprince, " ")

## [[1]]
## [1] "We"    "have"  "a"     "dream"
## 
## [[2]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"

strsplit(littleprince, " ")[[2]]

## [1] "It"    "is"    "only"  "with"  "the"   "HEART"

strsplit(littleprince, " ")[[2]][[5]]

## [1] "the"

fox.says <- "It is only with the HEART it"
fox.says.word <- strsplit(fox.says, " ")[[1]]
unique(fox.says.word)

## [1] "It"    "is"    "only"  "with"  "the"   "HEART" "it"

unique(tolower(fox.says.word))

## [1] "it"    "is"    "only"  "with"  "the"   "heart"

paste("Everybody", "wants", "to", "fly")

## [1] "Everybody wants to fly"

paste(c("Everybody", "wants", "to", "fly"))

## [1] "Everybody" "wants"     "to"        "fly"

paste("Everybody", "wants", "to", "fly", sep="-")

## [1] "Everybody-wants-to-fly"

paste("Everybody", "wants", "to", "fly", sep="")

## [1] "Everybodywantstofly"

paste0("Everybody", "wants", "to", "fly")

## [1] "Everybodywantstofly"

paste(pi, sqrt(pi))

## [1] "3.14159265358979 1.77245385090552"

paste("25 dgrees celsius is", 25*1.8+32, "degree Fahrenheit")

## [1] "25 dgrees celsius is 77 degree Fahrenheit"

heroes <- c("Batman", "Captain America", "Hulk")
colors <- c("Black", "Blue", "Green")
paste(heroes, colors)

## [1] "Batman Black"         "Captain America Blue" "Hulk Green"

paste("Type", 1:5)

## [1] "Type 1" "Type 2" "Type 3" "Type 4" "Type 5"

paste(heroes, "wants", "to", "fly")

## [1] "Batman wants to fly"          "Captain America wants to fly"
## [3] "Hulk wants to fly"

paste(c("Everybody", "wants", "to", "fly"))

## [1] "Everybody" "wants"     "to"        "fly"

paste(c("Everybody", "wants", "to", "fly"), collapse = " ")

## [1] "Everybody wants to fly"

paste(heroes, "wants", "to", "fly", collapse = ", and ")

## [1] "Batman wants to fly, and Captain America wants to fly, and Hulk wants to fly"

paste(month.abb, 1:12)

##  [1] "Jan 1"  "Feb 2"  "Mar 3"  "Apr 4"  "May 5"  "Jun 6"  "Jul 7"  "Aug 8" 
##  [9] "Sep 9"  "Oct 10" "Nov 11" "Dec 12"

paste(month.abb, 1:12, sep="_")

##  [1] "Jan_1"  "Feb_2"  "Mar_3"  "Apr_4"  "May_5"  "Jun_6"  "Jul_7"  "Aug_8" 
##  [9] "Sep_9"  "Oct_10" "Nov_11" "Dec_12"

paste(month.abb, 1:12, sep="_", collapse = "-")

## [1] "Jan_1-Feb_2-Mar_3-Apr_4-May_5-Jun_6-Jul_7-Aug_8-Sep_9-Oct_10-Nov_11-Dec_12"

outer(c(1, 2, 3), c(1, 2, 3))

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## [3,]    3    6    9

asian.countries <- c("Korea", "Japan","China")
info <- c("GDP", "Population", "Area")
outer(asian.countries, info, FUN = paste, sep = "-")

##      [,1]        [,2]               [,3]        
## [1,] "Korea-GDP" "Korea-Population" "Korea-Area"
## [2,] "Japan-GDP" "Japan-Population" "Japan-Area"
## [3,] "China-GDP" "China-Population" "China-Area"

x <- outer(asian.countries, asian.countries, FUN = paste, sep = "-")
x[!lower.tri(x)]

## [1] "Korea-Korea" "Korea-Japan" "Japan-Japan" "Korea-China" "Japan-China"
## [6] "China-China"

sprintf()

customer <- "Jobs"
buysize <- 10
deliveryday <- 3
paste("Hello ", customer, ", your order of ", buysize,
      " product(s) will be dilivered within ", deliveryday,
      "day(s)", sep = "")

## [1] "Hello Jobs, your order of 10 product(s) will be dilivered within 3day(s)"

sprintf("Hello %s your order of %s product(s) will be 
        dilivered within %s day(s)", customer, buysize, deliveryday)

## [1] "Hello Jobs your order of 10 product(s) will be \n        dilivered within 3 day(s)"

customer <- c("Jobs", "Gates", "Bezos")
buysize <- c(10, 7, 12)
deliveryday <- c(3, 2, 7.5)
sprintf("Hello %s your order of %s product(s) will be 
        dilivered within %s day(s)",
        customer, buysize, deliveryday)

## [1] "Hello Jobs your order of 10 product(s) will be \n        dilivered within 3 day(s)"   
## [2] "Hello Gates your order of 7 product(s) will be \n        dilivered within 2 day(s)"   
## [3] "Hello Bezos your order of 12 product(s) will be \n        dilivered within 7.5 day(s)"

substr(), substring(), grep(), sub()

substr("Data Analytics", start = 1, stop = 4)

## [1] "Data"

substr("Data Analytics", start = 6, stop = 14)

## [1] "Analytics"

substring("Data Analytics", 6)

## [1] "Analytics"

class <- c("Data Analytics", "Data Mining", "Data Visualization")
substr(class, 1, 4)

## [1] "Data" "Data" "Data"

countries <- c("Korea, KR", "Unites States, US", "China, CH")
substr(countries, nchar(countries) - 1, nchar(countries))

## [1] "KR" "US" "CH"

head(islands)

##       Africa   Antarctica         Asia    Australia Axel Heiberg       Baffin 
##        11506         5500        16988         2968           16          184

landmasses <- names(islands)
index <- grep(pattern = "New", x = landmasses)
landmasses[index]

## [1] "New Britain"     "New Guinea"      "New Zealand (N)" "New Zealand (S)"
## [5] "Newfoundland"

grep(pattern = "New", x = landmasses, value = TRUE)

## [1] "New Britain"     "New Guinea"      "New Zealand (N)" "New Zealand (S)"
## [5] "Newfoundland"

landmasses[grep(pattern = " ", landmasses)]

##  [1] "Axel Heiberg"     "New Britain"      "New Guinea"       "New Zealand (N)" 
##  [5] "New Zealand (S)"  "North America"    "Novaya Zemlya"    "Prince of Wales" 
##  [9] "South America"    "Tierra del Fuego"

grep(" ", landmasses, value = TRUE)

##  [1] "Axel Heiberg"     "New Britain"      "New Guinea"       "New Zealand (N)" 
##  [5] "New Zealand (S)"  "North America"    "Novaya Zemlya"    "Prince of Wales" 
##  [9] "South America"    "Tierra del Fuego"

fox.says <- "It is only with the HEART that is"
sub(pattern = "is", replacement = "was", x = fox.says)

## [1] "It was only with the HEART that is"

gsub(pattern = "is", replacement = "was", x = fox.says)

## [1] "It was only with the HEART that was"

x <- c("product.csv", "customer.csv", "supplier.csv")
sub(pattern = ".csv", "", x)

## [1] "product"  "customer" "supplier"

정규표현식

[:digit:] : [0-9]
[:lower:] : [a-z]
[:upper:] : [A-Z]
[:alpha:] : [A-z]
[:alnum:] : [A-z0-9]
[:punct:] : 문장부호
[:blank:] : space, tab
[:space:] : space, tab, newline, form feed, carrage return
[:print:] : [[:alnum:][:punct:][:space:]]
[:graph:] : 그래프 문자(읽을 수 있는 문자)

? : 0~1회
* : 0회 이상
+ : 1회 이상
{n} : n회 반복
{n,} : n회 이상 반복
{n, m} : n회~m회 반복

\w : [[:alnum:]_] 단어 문자
\W : [^[:alnum:]_] 단어 문자를 제외한 문자
\d : [[:digit:]] 숫자
\D : [^[:digit:]] 숫자를 제외한 문자
\s : [[:space:]] 스페이스 문자
\S : [^[:space:]] 스페이스 문자를 제외한 문자
\b : 단어 경계의 빈 문자열
\B : 단어 경계의 빈 문자열을 제외한 문자
\< : 단어 시작
\> : 단어 끝

words <- c("at", "bat", "cat", "chaenomelss", "chase", "chasse",
           "cheap", "check", "cheese", "chick", "hat")
grep("che", words, value = TRUE)

## [1] "cheap"  "check"  "cheese"

grep("at", words, value = TRUE)

## [1] "at"  "bat" "cat" "hat"

grep("[ch]", words, value = TRUE)

## [1] "cat"         "chaenomelss" "chase"       "chasse"      "cheap"      
## [6] "check"       "cheese"      "chick"       "hat"

grep("[at]", words, value = TRUE)

## [1] "at"          "bat"         "cat"         "chaenomelss" "chase"      
## [6] "chasse"      "cheap"       "hat"

grep("ch|at", words, value = TRUE)

##  [1] "at"          "bat"         "cat"         "chaenomelss" "chase"      
##  [6] "chasse"      "cheap"       "check"       "cheese"      "chick"      
## [11] "hat"

grep("ch(e|i)ck", words, value = TRUE)

## [1] "check" "chick"

grep("chas?e", words, value = TRUE)

## [1] "chaenomelss" "chase"

grep("chas*e", words, value = TRUE)

## [1] "chaenomelss" "chase"       "chasse"

grep("chas+e", words, value = TRUE)

## [1] "chase"  "chasse"

grep("ch(a*|e*)s+e", words, value = TRUE)

## [1] "chase"  "chasse" "cheese"

grep("^c", words, value = TRUE)

## [1] "cat"         "chaenomelss" "chase"       "chasse"      "cheap"      
## [6] "check"       "cheese"      "chick"

grep("t$", words, value = TRUE)

## [1] "at"  "bat" "cat" "hat"

grep("^c.t$", words, value = TRUE)

## [1] "cat"

grep("^[hc]?at", words, value = TRUE)

## [1] "at"  "cat" "hat"

words2 <- c("12 Dec", "OK", "http://", "<TITLE>Time?</TITLE>",
            "12345", "Hi there")
grep("[[:alnum:]]", words2, value = TRUE)

## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "12345"                "Hi there"

grep("[[:alpha:]]", words2, value = TRUE)

## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "Hi there"

grep("[[:digit:]]", words2, value = TRUE)

## [1] "12 Dec" "12345"

grep("[[:punct:]]", words2, value = TRUE)

## [1] "http://"              "<TITLE>Time?</TITLE>"

grep("[[:space:]]", words2, value = TRUE)

## [1] "12 Dec"   "Hi there"

grep("\\w+", words2, value = TRUE)

## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "12345"                "Hi there"

grep("\\d+", words2, value = TRUE)

## [1] "12 Dec" "12345"

grep("\\s+", words2, value = TRUE)

## [1] "12 Dec"   "Hi there"

library(base)

grep(), grepl()

string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")

grep(pattern = "data", x = string)

## [1] 1 3

grep(pattern = "data", x = string, value = TRUE)

## [1] "data analytics in useful"                                
## [2] "visualization of data is interesting for data scientists"

string[grep(pattern = "data", x = string)]

## [1] "data analytics in useful"                                
## [2] "visualization of data is interesting for data scientists"

grep("useful|helpful", string, value = TRUE)

## [1] "data analytics in useful"      "business analytics is helpful"

grep("useful|helpful", string, value = TRUE, invert = TRUE)

## [1] "visualization of data is interesting for data scientists"

grepl(pattern = "data", x = string)

## [1]  TRUE FALSE  TRUE

state.name

##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"

grepl("new", state.name, ignore.case = TRUE)

##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE

state.name[grepl("new", state.name, ignore.case = TRUE)]

## [1] "New Hampshire" "New Jersey"    "New Mexico"    "New York"

sum(grepl("new", state.name, ignore.case = TRUE))

## [1] 4

regexpr(), gregexpr(), rematches()

regexpr(pattern = "data", text = string)

## [1]  1 -1 18
## attr(,"match.length")
## [1]  4 -1  4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE

gregexpr(pattern = "data", text = string)

## [[1]]
## [1] 1
## attr(,"match.length")
## [1] 4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## 
## [[3]]
## [1] 18 42
## attr(,"match.length")
## [1] 4 4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE

regmatches(x = string, m = regexpr(pattern = "data", text = string))

## [1] "data" "data"

regmatches(x = string, m = gregexpr(pattern = "data", text = string))

## [[1]]
## [1] "data"
## 
## [[2]]
## character(0)
## 
## [[3]]
## [1] "data" "data"

regmatches(x = string,
           m = gregexpr(pattern = "data", text = string),
           invert = TRUE)

## [[1]]
## [1] ""                     " analytics in useful"
## 
## [[2]]
## [1] "business analytics is helpful"
## 
## [[3]]
## [1] "visualization of "    " is interesting for " " scientists"

sub(), gsub(), strsplit()

sub(pattern = "data", replacement = "text", x = string)

## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for data scientists"

gsub(pattern = "data", replacement = "text", x = string)

## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for text scientists"

strsplit(x = string, split = " ")

## [[1]]
## [1] "data"      "analytics" "in"        "useful"   
## 
## [[2]]
## [1] "business"  "analytics" "is"        "helpful"  
## 
## [[3]]
## [1] "visualization" "of"            "data"          "is"           
## [5] "interesting"   "for"           "data"          "scientists"

unlist(strsplit(x = string, split = " "))

##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "analytics"     "is"            "helpful"      
##  [9] "visualization" "of"            "data"          "is"           
## [13] "interesting"   "for"           "data"          "scientists"

unique(unlist(strsplit(x = string, split = " ")))

##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "is"            "helpful"       "visualization"
##  [9] "of"            "interesting"   "for"           "scientists"

library(stringr)

ste_detect()

string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")
library(stringr)

## 
## Attaching package: 'stringr'

## The following objects are masked _by_ '.GlobalEnv':
## 
##     fruit, words

str_detect(string = string, pattern = "data")

## [1]  TRUE FALSE  TRUE

str_detect(string = string, pattern = "DATA")

## [1] FALSE FALSE FALSE

str_detect(string = string, fixed(pattern = "DATA", ignore_case = TRUE))

## [1]  TRUE FALSE  TRUE

str_detect(c("abz", "ayz", "a.z"), "a.z")

## [1] TRUE TRUE TRUE

str_detect(c("abz", "ayz", "a.z"), fixed("a.z"))

## [1] FALSE FALSE  TRUE

str_detect(c("abz", "ayz", "a.z"), "a\\.z")

## [1] FALSE FALSE  TRUE

str_locate(), str_locate_all(), str_extract(), str_extract_all()

str_locate(string, "data")

##      start end
## [1,]     1   4
## [2,]    NA  NA
## [3,]    18  21

str_locate_all(string, "data")

## [[1]]
##      start end
## [1,]     1   4
## 
## [[2]]
##      start end
## 
## [[3]]
##      start end
## [1,]    18  21
## [2,]    42  45

str_extract(string, "data")

## [1] "data" NA     "data"

str_extract_all(string, "data")

## [[1]]
## [1] "data"
## 
## [[2]]
## character(0)
## 
## [[3]]
## [1] "data" "data"

str_extract_all(string, "data", simplify = TRUE)

##      [,1]   [,2]  
## [1,] "data" ""    
## [2,] ""     ""    
## [3,] "data" "data"

unlist(str_extract_all(string, "data"))

## [1] "data" "data" "data"

str_match(), str_match_all(), str_replace(), str_replace_all()

sentences5 <- sentences[1:5]
sentences5

## [1] "The birch canoe slid on the smooth planks." 
## [2] "Glue the sheet to the dark blue background."
## [3] "It's easy to tell the depth of a well."     
## [4] "These days a chicken leg is a rare dish."   
## [5] "Rice is often served in round bowls."

str_extract(sentences5, "(a|A|th|the) (\\w+)")

## [1] "the smooth" "the sheet"  "the depth"  "a chicken"  NA

str_match(sentences5, "(a|A|th|the) (\\w+)")

##      [,1]         [,2]  [,3]     
## [1,] "the smooth" "the" "smooth" 
## [2,] "the sheet"  "the" "sheet"  
## [3,] "the depth"  "the" "depth"  
## [4,] "a chicken"  "a"   "chicken"
## [5,] NA           NA    NA

str_match_all(sentences5, "(a|A|th|the) (\\w+)")

## [[1]]
##      [,1]         [,2]  [,3]    
## [1,] "the smooth" "the" "smooth"
## 
## [[2]]
##      [,1]        [,2]  [,3]   
## [1,] "the sheet" "the" "sheet"
## [2,] "the dark"  "the" "dark" 
## 
## [[3]]
##      [,1]        [,2]  [,3]   
## [1,] "the depth" "the" "depth"
## [2,] "a well"    "a"   "well" 
## 
## [[4]]
##      [,1]        [,2] [,3]     
## [1,] "a chicken" "a"  "chicken"
## [2,] "a rare"    "a"  "rare"   
## 
## [[5]]
##      [,1] [,2] [,3]

str_replace(string = string, pattern = "data", replacement = "text")

## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for data scientists"

str_replace_all(string = string, pattern = "data", replacement = "text")

## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for text scientists"

str_split(), str_length(), str_count()

str_split(string, " ")

## [[1]]
## [1] "data"      "analytics" "in"        "useful"   
## 
## [[2]]
## [1] "business"  "analytics" "is"        "helpful"  
## 
## [[3]]
## [1] "visualization" "of"            "data"          "is"           
## [5] "interesting"   "for"           "data"          "scientists"

unlist(str_split(string, " "))

##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "analytics"     "is"            "helpful"      
##  [9] "visualization" "of"            "data"          "is"           
## [13] "interesting"   "for"           "data"          "scientists"

unique(unlist(str_split(string, " ")))

##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "is"            "helpful"       "visualization"
##  [9] "of"            "interesting"   "for"           "scientists"

str_split(string, " ", n = 3)

## [[1]]
## [1] "data"      "analytics" "in useful"
## 
## [[2]]
## [1] "business"   "analytics"  "is helpful"
## 
## [[3]]
## [1] "visualization"                          
## [2] "of"                                     
## [3] "data is interesting for data scientists"

str_split(string, " ", n = 3, simplify = TRUE)

##      [,1]            [,2]        [,3]                                     
## [1,] "data"          "analytics" "in useful"                              
## [2,] "business"      "analytics" "is helpful"                             
## [3,] "visualization" "of"        "data is interesting for data scientists"

str_length(string)

## [1] 24 29 56

str_count(string, "data")

## [1] 1 0 2

str_count(string, "\\w+")

## [1] 4 4 8

str_pad(), str_trim()

str_pad(string = c("a", "abc", "abcde"),
        width = 6,
        side = "left",
        pad = " ")

## [1] "     a" "   abc" " abcde"

mon <- 1:12
str_pad(mon, width = 2, side = "left", pad = "0")

##  [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"

string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")
str.pad <- str_pad(string,
                   width = max(str_length(string)),
                   side = "both",
                   pad = " ")
str.pad

## [1] "                data analytics in useful                "
## [2] "             business analytics is helpful              "
## [3] "visualization of data is interesting for data scientists"

str_trim(str.pad, side = "both")

## [1] "data analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of data is interesting for data scientists"

str_c()

str_c("data", "mining", sep = " ")

## [1] "data mining"

str.mining <- str_c(c("data mining", "text mining"),
                    "is useful",
                    sep = " ")
str.mining

## [1] "data mining is useful" "text mining is useful"

str_c(str.mining, collapse = "; ")

## [1] "data mining is useful; text mining is useful"

str_c(str.mining, collapse = "\n")

## [1] "data mining is useful\ntext mining is useful"

cat(str_c(str.mining, collapse = "\n"))

## data mining is useful
## text mining is useful

str_sub(string = str.mining, start = 1, end = 4)

## [1] "data" "text"

str_sub(str.mining, 5, 5)

## [1] " " " "

str_sub(str.mining, 5, 5) <- "-"
str.mining

## [1] "data-mining is useful" "text-mining is useful"

str_sub("abcdefg", start = -2)

## [1] "fg"

str_sub("abcdefg", end = -3)

## [1] "abcde"

파일 읽기

Sample Files : https://github.com/kykwahk/YouTube

read.csv(), read.table(), read.fwf()

# list.files("rBasicLec")
# library(pander)
# openFileInOS("C:/Users/jacea/workspaceR/RPubs/rBasicLec/product.csv")

read.csv("rBasicLec/product.csv")

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

read.csv("rBasicLec/product-with-no-header.csv", header = FALSE)

##     V1       V2    V3
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

p <- read.csv("rBasicLec/product.csv")
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000

read.table("rBasicLec/product.txt")

##     V1       V2    V3
## 1   id     name price
## 2 A001    Mouse 30000
## 3 A002 Keyboard 90000
## 4 A003      USB 50000

p <- read.table("rBasicLec/product.txt", header = TRUE)
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000

p <- read.table("rBasicLec/product.txt",
                header = TRUE, stringsAsFactors = FALSE)
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000

p <- read.table("rBasicLec/product-colon.txt",
                sep = ":",
                header = TRUE,
                stringsAsFactors = FALSE)
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  " Mouse" " Keyboard" " USB"
##  $ price: int  30000 90000 50000

p <- read.table("rBasicLec/product-missing.txt",
                header = TRUE)
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: chr  "30000" "." "50000"

p <- read.table("rBasicLec/product-missing.txt",
                header = TRUE,
                na.strings = ".")
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 NA 50000

p <- read.fwf("rBasicLec/product-fwf.txt",
                widths = c(4, -1, 10, 8),
                col.names = c("id", "name", "price"))
str(p)

## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse     " "Keyboard  " "USB       "
##  $ price: int  30000 90000 50000

readLines(), scan()

readLines("rBasicLec/won-dollar.txt")

## [1] "2014-11-27 1116.70 1078.30 2014-11-28 1127.89 1089.11"                           
## [2] "2014-12-01 1130.13 1091.27 2014-12-02 1130.13 1091.27 2014-12-03 1131.86 1092.94"
## [3] "2014-12-04 1134.51 1095.49"                                                      
## [4] "2014-12-05 1134.51 1095.49 2014-12-08 1139.60 1100.40"                           
## [5] "2014-12-09 1134.51 1095.49 2014-12-10 1121.79 1083.21"

readLines("rBasicLec/won-dollar.txt", n = 2)

## [1] "2014-11-27 1116.70 1078.30 2014-11-28 1127.89 1089.11"                           
## [2] "2014-12-01 1130.13 1091.27 2014-12-02 1130.13 1091.27 2014-12-03 1131.86 1092.94"

scan("rBasicLec/won-dollar.txt", what = character())

##  [1] "2014-11-27" "1116.70"    "1078.30"    "2014-11-28" "1127.89"   
##  [6] "1089.11"    "2014-12-01" "1130.13"    "1091.27"    "2014-12-02"
## [11] "1130.13"    "1091.27"    "2014-12-03" "1131.86"    "1092.94"   
## [16] "2014-12-04" "1134.51"    "1095.49"    "2014-12-05" "1134.51"   
## [21] "1095.49"    "2014-12-08" "1139.60"    "1100.40"    "2014-12-09"
## [26] "1134.51"    "1095.49"    "2014-12-10" "1121.79"    "1083.21"

scan("rBasicLec/won-dollar.txt",
     what = list(character(),
                 numeric(),
                 numeric()))

## [[1]]
##  [1] "2014-11-27" "2014-11-28" "2014-12-01" "2014-12-02" "2014-12-03"
##  [6] "2014-12-04" "2014-12-05" "2014-12-08" "2014-12-09" "2014-12-10"
## 
## [[2]]
##  [1] 1116.70 1127.89 1130.13 1130.13 1131.86 1134.51 1134.51 1139.60 1134.51
## [10] 1121.79
## 
## [[3]]
##  [1] 1078.30 1089.11 1091.27 1091.27 1092.94 1095.49 1095.49 1100.40 1095.49
## [10] 1083.21

scan("rBasicLec/won-dollar.txt",
     what = list(date = character(),
                 buy = numeric(),
                 sell = numeric()),
     nlines = 2)

## $date
## [1] "2014-11-27" "2014-11-28" "2014-12-01" "2014-12-02" "2014-12-03"
## 
## $buy
## [1] 1116.70 1127.89 1130.13 1130.13 1131.86
## 
## $sell
## [1] 1078.30 1089.11 1091.27 1091.27 1092.94

scan("rBasicLec/won-dollar.txt",
     what = list(date = character(),
                 buy = numeric(),
                 sell = numeric()),
     skip = 3)

## $date
## [1] "2014-12-05" "2014-12-08" "2014-12-09" "2014-12-10"
## 
## $buy
## [1] 1134.51 1139.60 1134.51 1121.79
## 
## $sell
## [1] 1095.49 1100.40 1095.49 1083.21

library(openxlsx)

library(openxlsx)

read.xlsx("rBasicLec/product.xlsx",
          colNames = TRUE,
          sheet = 1)

##     id     name price madein
## 1 A001    Mouse 30000     KR
## 2 A002 Keyboard 90000     CN
## 3 A003      USB 50000     US

날짜

Sys.Date(), date(), Sys.time(), weekdays()

Sys.Date()

## [1] "2020-07-31"

class(Sys.Date())

## [1] "Date"

date()

## [1] "Fri Jul 31 23:30:28 2020"

class(date())

## [1] "character"

Sys.time()

## [1] "2020-07-31 23:30:28 KST"

class(Sys.time())

## [1] "POSIXct" "POSIXt"

as.Date("2025-12-31")

## [1] "2025-12-31"

as.Date("2020/11/02")

## [1] "2020-11-02"

as.Date("12/3/2021", format("%m/%d/%Y"))

## [1] "2021-12-03"

# ?strptime

d <- as.Date("2025-12-31")
format(d, format = "%m/%d/%Y")

## [1] "12/31/2025"

today <- Sys.Date()
format(today, format = "%Y/%m/%d")

## [1] "2020/07/31"

format(today, format = "%Y/%m/%d %A")

## [1] "2020/07/31 금요일"

format(today, format = "%Y/%m/%d %a")

## [1] "2020/07/31 금"

d <- as.Date("2025-12-31")
weekdays(d)

## [1] "수요일"

d + 7

## [1] "2026-01-07"

d + 1:7

## [1] "2026-01-01" "2026-01-02" "2026-01-03" "2026-01-04" "2026-01-05"
## [6] "2026-01-06" "2026-01-07"

weekdays(d + 1:7)

## [1] "목요일" "금요일" "토요일" "일요일" "월요일" "화요일" "수요일"

start <- as.Date("2025-01-01")
end <- as.Date("2025-01-31")
seq(from = start, to = end, by = 1)

##  [1] "2025-01-01" "2025-01-02" "2025-01-03" "2025-01-04" "2025-01-05"
##  [6] "2025-01-06" "2025-01-07" "2025-01-08" "2025-01-09" "2025-01-10"
## [11] "2025-01-11" "2025-01-12" "2025-01-13" "2025-01-14" "2025-01-15"
## [16] "2025-01-16" "2025-01-17" "2025-01-18" "2025-01-19" "2025-01-20"
## [21] "2025-01-21" "2025-01-22" "2025-01-23" "2025-01-24" "2025-01-25"
## [26] "2025-01-26" "2025-01-27" "2025-01-28" "2025-01-29" "2025-01-30"
## [31] "2025-01-31"

seq(from = start, by = 1, length.out = 7)

## [1] "2025-01-01" "2025-01-02" "2025-01-03" "2025-01-04" "2025-01-05"
## [6] "2025-01-06" "2025-01-07"

seq(from = start, by = "7 days", length.out = 7)

## [1] "2025-01-01" "2025-01-08" "2025-01-15" "2025-01-22" "2025-01-29"
## [6] "2025-02-05" "2025-02-12"

seq(from = start, by = "week", length.out = 7)

## [1] "2025-01-01" "2025-01-08" "2025-01-15" "2025-01-22" "2025-01-29"
## [6] "2025-02-05" "2025-02-12"

seq(from = start, by = "month", length.out = 12)

##  [1] "2025-01-01" "2025-02-01" "2025-03-01" "2025-04-01" "2025-05-01"
##  [6] "2025-06-01" "2025-07-01" "2025-08-01" "2025-09-01" "2025-10-01"
## [11] "2025-11-01" "2025-12-01"

seq(from = start, by = "3 months", length.out = 4)

## [1] "2025-01-01" "2025-04-01" "2025-07-01" "2025-10-01"

seq(from = start, by = "year", length.out = 10)

##  [1] "2025-01-01" "2026-01-01" "2027-01-01" "2028-01-01" "2029-01-01"
##  [6] "2030-01-01" "2031-01-01" "2032-01-01" "2033-01-01" "2034-01-01"

seq(from = as.Date("2025-01-30"),
    by = "month",
    length.out = 6)

## [1] "2025-01-30" "2025-03-02" "2025-03-30" "2025-04-30" "2025-05-30"
## [6] "2025-06-30"

months(), quarters(), Sys.setlocale()

start <- as.Date("2025-01-01")
qrt <- seq(from = start, by = "3 months", length.out = 4)
start

## [1] "2025-01-01"

qrt

## [1] "2025-01-01" "2025-04-01" "2025-07-01" "2025-10-01"

months(qrt)

## [1] "1월"  "4월"  "7월"  "10월"

quarters(qrt)

## [1] "Q1" "Q2" "Q3" "Q4"

Sys.getlocale()

## [1] "LC_COLLATE=Korean_Korea.949;LC_CTYPE=Korean_Korea.949;LC_MONETARY=Korean_Korea.949;LC_NUMERIC=C;LC_TIME=Korean_Korea.949"

Sys.setlocale("LC_TIME", "C")

## [1] "C"

months(qrt)

## [1] "January" "April"   "July"    "October"

Sys.setlocale("LC_TIME", "Korean_Korea.949")

## [1] "Korean_Korea.949"

months(qrt)

## [1] "1월"  "4월"  "7월"  "10월"

Sys.setlocale()

## [1] "LC_COLLATE=Korean_Korea.949;LC_CTYPE=Korean_Korea.949;LC_MONETARY=Korean_Korea.949;LC_NUMERIC=C;LC_TIME=Korean_Korea.949"

as.POSIXct(), as.POSIXlt(), strptime()

pct <- as.POSIXct("2025/03/15, 15:03:02", format("%Y/%m/%d, %H:%M:%S"), tz = "Asia/Seoul") # 숫자
pct

## [1] "2025-03-15 15:03:02 KST"

class(pct)

## [1] "POSIXct" "POSIXt"

as.integer(pct)

## [1] 1742018582

plt <- as.POSIXlt("2025/03/15, 15:03:02", format("%Y/%m/%d, %H:%M:%S"), tz = "Asia/Seoul") # 리스트
plt

## [1] "2025-03-15 15:03:02 KST"

class(plt)

## [1] "POSIXlt" "POSIXt"

as.integer(plt)

## Warning: 강제형변환에 의해 생성된 NA 입니다

##  [1]   2   3  15  15   2 125   6  73   0  NA  NA

unclass(plt)

## $sec
## [1] 2
## 
## $min
## [1] 3
## 
## $hour
## [1] 15
## 
## $mday
## [1] 15
## 
## $mon
## [1] 2
## 
## $year
## [1] 125
## 
## $wday
## [1] 6
## 
## $yday
## [1] 73
## 
## $isdst
## [1] 0
## 
## $zone
## [1] "KST"
## 
## $gmtoff
## [1] NA
## 
## attr(,"tzone")
## [1] "Asia/Seoul"

plt$mday

## [1] 15

plt$mon

## [1] 2

plt$year

## [1] 125

plt$wday

## [1] 6

plt$hour

## [1] 15

dposix <- as.Date("2025-12-31")
dposix

## [1] "2025-12-31"

as.POSIXlt(dposix)$wday

## [1] 3

as.POSIXlt(dposix)$yday

## [1] 364

as.POSIXlt(dposix)$year + 1900

## [1] 2025

as.POSIXlt(dposix)$mon + 1

## [1] 12

strptime("2025-12-31", format="%Y-%m-%d")

## [1] "2025-12-31 KST"

class(strptime("2025-12-31", format="%Y-%m-%d"))

## [1] "POSIXlt" "POSIXt"

strptime("2025-12-31", format="%Y-%m-%d")$year + 1900

## [1] 2025

format(), ISOdate(), difftime()

moon <- as.POSIXct("1969/07/20, 20:17:39",
                   format("%Y/%m/%d, %H:%M:%S"),
                   tz = "UTC")
moon

## [1] "1969-07-20 20:17:39 UTC"

format(moon, "The time of the Apollo moon landing was %Y/%m/%d, at %H:%M:%S.")

## [1] "The time of the Apollo moon landing was 1969/07/20, at 20:17:39."

y <- 2020
m <- 12
d <- 31
ISOdate(y, m, d)

## [1] "2020-12-31 12:00:00 GMT"

class(ISOdate(y, m, d))

## [1] "POSIXct" "POSIXt"

as.Date(ISOdate(y, m, d))

## [1] "2020-12-31"

years <- c(2025, 2026, 2027, 2028)
months <- c(1, 4, 7, 10)
days <- c(12, 19, 25, 17)
ISOdate(years, months, days)

## [1] "2025-01-12 12:00:00 GMT" "2026-04-19 12:00:00 GMT"
## [3] "2027-07-25 12:00:00 GMT" "2028-10-17 12:00:00 GMT"

jdate <- as.Date("2025-12-31")
jdate

## [1] "2025-12-31"

as.integer(jdate)

## [1] 20453

julian(jdate)

## [1] 20453
## attr(,"origin")
## [1] "1970-01-01"

as.integer(as.Date("1970-01-01"))

## [1] 0

as.integer(as.Date("1969-12-31"))

## [1] -1

as.integer(as.Date("1970-01-02"))

## [1] 1

as.Date(as.integer(jdate), origin = "1970-01-01")

## [1] "2025-12-31"

class(moon) # POSIXct

## [1] "POSIXct" "POSIXt"

moon + 60 * 60 * 2 # 2시간 후, 초단위로

## [1] "1969-07-20 22:17:39 UTC"

moon + 60 * 60 * 24 * 7

## [1] "1969-07-27 20:17:39 UTC"

moon - 60 * 60 * 24 * 7

## [1] "1969-07-13 20:17:39 UTC"

as.Date(moon) + 7

## [1] "1969-07-27"

start <- as.Date("1988-09-17")
end <- as.Date("2018-02-09")
start

## [1] "1988-09-17"

end

## [1] "2018-02-09"

end - start

## Time difference of 10737 days

today <- Sys.Date()
dooly <- as.Date("1983-04-22")
difftime(today, dooly, units = "days")

## Time difference of 13615 days

difftime(today, dooly, units = "weeks")

## Time difference of 1945 weeks

class(moon)

## [1] "POSIXct" "POSIXt"

Sys.time() > moon

## [1] TRUE

Sys.Date() > as.Date(moon)

## [1] TRUE

함수

function(), ls(), rm()

rm(list = ls())
transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  result <- paste(tlength, "m", sep = "")
  return(result)
}
ls()

## [1] "transLength"

y <- c(100, 150, 200)
transLength(y)

## [1] "91.4m"  "137.2m" "182.9m"

trans2 <- transLength
trans2

## function(x) {
##   tlength <- round(x * 0.9144, digits = 1)
##   result <- paste(tlength, "m", sep = "")
##   return(result)
## }

trans2(y)

## [1] "91.4m"  "137.2m" "182.9m"

transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  result <- paste(tlength, "m", sep = "")
}
transLength(y)
print(transLength(y))

## [1] "91.4m"  "137.2m" "182.9m"

transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  paste(tlength, "m", sep = "")
}
transLength(y)

## [1] "91.4m"  "137.2m" "182.9m"

transLength <- function(x) {
  if (!is.numeric(x)) return("Not a Number")
  tlength <- round(x * 0.9144, digits = 1)
  paste(tlength, "m", sep = "")
}
transLength("ABC")

## [1] "Not a Number"

f1 <- function(x, y) {x + y}
f2 <- function(x, y) x + y
f1(1, 3)

## [1] 4

f2(1, 3)

## [1] 4

transLength <- function(x) paste(round(x * 0.9144, digits = 1), "m", sep = "")
transLength(y)

## [1] "91.4m"  "137.2m" "182.9m"

transLength <- function(x, mult, unit) {
  tlength <- round(x * mult, digits = 1)
  paste(tlength, unit, sep = "")
}
transLength(y, mult = 3, unit = "ft")

## [1] "300ft" "450ft" "600ft"

transLength(y, mult = 36, unit = "in")

## [1] "3600in" "5400in" "7200in"

# transLength(y) # ERROR!

transLength <- function(x, mult = 0.9144, unit = "m") {
  tlength <- round(x * mult, digits = 1)
  paste(tlength, unit, sep = "")
}
transLength(y)

## [1] "91.4m"  "137.2m" "182.9m"

transLength(y, mult = 3, unit = "ft")

## [1] "300ft" "450ft" "600ft"

transLength(y, 3, "ft")

## [1] "300ft" "450ft" "600ft"

transLength <- function(x, mult = 0.9144, unit = "m", ...) {
  tlength <- round(x * mult, ...)
  paste(tlength, unit, sep = "")
}
transLength(y, digits = 2)

## [1] "91.44m"  "137.16m" "182.88m"

transLength(y)

## [1] "91m"  "137m" "183m"

transLength <- function(x, mult = 0.9144, unit = "m", digits = 1) {
  tlength <- round(x * mult, digits = digits)
  paste(tlength, unit, sep = "")
}
transLength(y, digits = 2)

## [1] "91.44m"  "137.16m" "182.88m"

transLength(y)

## [1] "91.4m"  "137.2m" "182.9m"

transLength <- function(x, mult = 0.9144, unit = "m", FUN = round, ...) {
  tlength <- FUN(x * mult, ...)
  paste(tlength, unit, sep = "")
}
transLength(y, FUN = signif, digits = 3)

## [1] "91.4m" "137m"  "183m"

transLength(y, FUN = floor)

## [1] "91m"  "137m" "182m"

transLength(y)

## [1] "91m"  "137m" "183m"

x <- 11:15
scopetest <- function(x) {
  cat("This is x: ", x, "\n")
  rm(x)
  cat("This is x after removing x", x, "\n")
}
scopetest(x = 15:11)

## This is x:  15 14 13 12 11 
## This is x after removing x 11 12 13 14 15

논리흐름 제어

if(), ifelse()

x <- pi
y <- 3
if (x > y) x

## [1] 3.141593

if (x < y) x

if (x < y) x else y

## [1] 3

x <- pi
y <- 1:5
if (x < y) x else y

## Warning in if (x < y) x else y: length > 1 이라는 조건이 있고, 첫번째 요소만이
## 사용될 것입니다

## [1] 1 2 3 4 5

if (x > y) x else y

## Warning in if (x > y) x else y: length > 1 이라는 조건이 있고, 첫번째 요소만이
## 사용될 것입니다

## [1] 3.141593

test <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
yes <- 1:5
no <- 0
ifelse(test, yes, no)

## [1] 1 0 3 4 0

ifelse(x > y, x, y)

## [1] 3.141593 3.141593 3.141593 4.000000 5.000000

switch()

center <- function(x, type) {
  switch(type,
         mean = mean(x),
         median = median(x),
         trimmed = mean(x, trim = 0.1),
         "Choose one of mean, median, and trimmed"
         )
}
x <- c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
center(x, "mean")

## [1] 12.9

center(x, "median")

## [1] 12

center(x, "trimmed")

## [1] 12.25

center(x, "other")

## [1] "Choose one of mean, median, and trimmed"

repeat(), while(), for()

# repeat print("hello") # 무한 반복, 멈추려면 ESC

i <- 5
repeat {if (i > 25) break
  else {
    print(i)
    i <- i + 5}
  }

## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25

i <- 5
while(i <= 25) {
  print(i)
  i <- i + 5
}

## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25

for (i in seq(from = 5, to = 25, by = 5)) print(i)

## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25

for (i in seq(from = 5, to = 25, by = 5)) i

i <- 1
for (i in seq(from = 5, to = 25, by = 5)) i
i

## [1] 25

서브셋

subset()

str(mtcars)

## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

mtcars$mpg

##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4

mtcars[["mpg"]]

##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4

mtcars[[1]]

##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4

mtcars[c(1, 4)]

##                      mpg  hp
## Mazda RX4           21.0 110
## Mazda RX4 Wag       21.0 110
## Datsun 710          22.8  93
## Hornet 4 Drive      21.4 110
## Hornet Sportabout   18.7 175
## Valiant             18.1 105
## Duster 360          14.3 245
## Merc 240D           24.4  62
## Merc 230            22.8  95
## Merc 280            19.2 123
## Merc 280C           17.8 123
## Merc 450SE          16.4 180
## Merc 450SL          17.3 180
## Merc 450SLC         15.2 180
## Cadillac Fleetwood  10.4 205
## Lincoln Continental 10.4 215
## Chrysler Imperial   14.7 230
## Fiat 128            32.4  66
## Honda Civic         30.4  52
## Toyota Corolla      33.9  65
## Toyota Corona       21.5  97
## Dodge Challenger    15.5 150
## AMC Javelin         15.2 150
## Camaro Z28          13.3 245
## Pontiac Firebird    19.2 175
## Fiat X1-9           27.3  66
## Porsche 914-2       26.0  91
## Lotus Europa        30.4 113
## Ford Pantera L      15.8 264
## Ferrari Dino        19.7 175
## Maserati Bora       15.0 335
## Volvo 142E          21.4 109

mtcars[c("mpg", "hp")]

##                      mpg  hp
## Mazda RX4           21.0 110
## Mazda RX4 Wag       21.0 110
## Datsun 710          22.8  93
## Hornet 4 Drive      21.4 110
## Hornet Sportabout   18.7 175
## Valiant             18.1 105
## Duster 360          14.3 245
## Merc 240D           24.4  62
## Merc 230            22.8  95
## Merc 280            19.2 123
## Merc 280C           17.8 123
## Merc 450SE          16.4 180
## Merc 450SL          17.3 180
## Merc 450SLC         15.2 180
## Cadillac Fleetwood  10.4 205
## Lincoln Continental 10.4 215
## Chrysler Imperial   14.7 230
## Fiat 128            32.4  66
## Honda Civic         30.4  52
## Toyota Corolla      33.9  65
## Toyota Corona       21.5  97
## Dodge Challenger    15.5 150
## AMC Javelin         15.2 150
## Camaro Z28          13.3 245
## Pontiac Firebird    19.2 175
## Fiat X1-9           27.3  66
## Porsche 914-2       26.0  91
## Lotus Europa        30.4 113
## Ford Pantera L      15.8 264
## Ferrari Dino        19.7 175
## Maserati Bora       15.0 335
## Volvo 142E          21.4 109

mtcars[-c(2, 3, 5, 7:11)]

##                      mpg  hp    wt
## Mazda RX4           21.0 110 2.620
## Mazda RX4 Wag       21.0 110 2.875
## Datsun 710          22.8  93 2.320
## Hornet 4 Drive      21.4 110 3.215
## Hornet Sportabout   18.7 175 3.440
## Valiant             18.1 105 3.460
## Duster 360          14.3 245 3.570
## Merc 240D           24.4  62 3.190
## Merc 230            22.8  95 3.150
## Merc 280            19.2 123 3.440
## Merc 280C           17.8 123 3.440
## Merc 450SE          16.4 180 4.070
## Merc 450SL          17.3 180 3.730
## Merc 450SLC         15.2 180 3.780
## Cadillac Fleetwood  10.4 205 5.250
## Lincoln Continental 10.4 215 5.424
## Chrysler Imperial   14.7 230 5.345
## Fiat 128            32.4  66 2.200
## Honda Civic         30.4  52 1.615
## Toyota Corolla      33.9  65 1.835
## Toyota Corona       21.5  97 2.465
## Dodge Challenger    15.5 150 3.520
## AMC Javelin         15.2 150 3.435
## Camaro Z28          13.3 245 3.840
## Pontiac Firebird    19.2 175 3.845
## Fiat X1-9           27.3  66 1.935
## Porsche 914-2       26.0  91 2.140
## Lotus Europa        30.4 113 1.513
## Ford Pantera L      15.8 264 3.170
## Ferrari Dino        19.7 175 2.770
## Maserati Bora       15.0 335 3.570
## Volvo 142E          21.4 109 2.780

mtcars[-1]

##                     cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4             6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag         6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710            4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive        6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout     8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant               6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360            8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D             4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230              4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280              6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C             6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE            8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL            8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC           8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood    8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial     8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128              4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic           4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla        4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona         4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger      8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin           8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28            8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird      8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9             4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2         4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa          4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L        8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino          6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora         8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E            4 121.0 109 4.11 2.780 18.60  1  1    4    2

mtcars[1] <- NULL
mtcars

##                     cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4             6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag         6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710            4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive        6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout     8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant               6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360            8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D             4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230              4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280              6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C             6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE            8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL            8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC           8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood    8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial     8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128              4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic           4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla        4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona         4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger      8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin           8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28            8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird      8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9             4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2         4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa          4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L        8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino          6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora         8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E            4 121.0 109 4.11 2.780 18.60  1  1    4    2

# mtcars[c(-1, 2)] # ERROR

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

iris[1:5, ]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa

iris[, c("Sepal.Length", "Sepal.Width")]

##     Sepal.Length Sepal.Width
## 1            5.1         3.5
## 2            4.9         3.0
## 3            4.7         3.2
## 4            4.6         3.1
## 5            5.0         3.6
## 6            5.4         3.9
## 7            4.6         3.4
## 8            5.0         3.4
## 9            4.4         2.9
## 10           4.9         3.1
## 11           5.4         3.7
## 12           4.8         3.4
## 13           4.8         3.0
## 14           4.3         3.0
## 15           5.8         4.0
## 16           5.7         4.4
## 17           5.4         3.9
## 18           5.1         3.5
## 19           5.7         3.8
## 20           5.1         3.8
## 21           5.4         3.4
## 22           5.1         3.7
## 23           4.6         3.6
## 24           5.1         3.3
## 25           4.8         3.4
## 26           5.0         3.0
## 27           5.0         3.4
## 28           5.2         3.5
## 29           5.2         3.4
## 30           4.7         3.2
## 31           4.8         3.1
## 32           5.4         3.4
## 33           5.2         4.1
## 34           5.5         4.2
## 35           4.9         3.1
## 36           5.0         3.2
## 37           5.5         3.5
## 38           4.9         3.6
## 39           4.4         3.0
## 40           5.1         3.4
## 41           5.0         3.5
## 42           4.5         2.3
## 43           4.4         3.2
## 44           5.0         3.5
## 45           5.1         3.8
## 46           4.8         3.0
## 47           5.1         3.8
## 48           4.6         3.2
## 49           5.3         3.7
## 50           5.0         3.3
## 51           7.0         3.2
## 52           6.4         3.2
## 53           6.9         3.1
## 54           5.5         2.3
## 55           6.5         2.8
## 56           5.7         2.8
## 57           6.3         3.3
## 58           4.9         2.4
## 59           6.6         2.9
## 60           5.2         2.7
## 61           5.0         2.0
## 62           5.9         3.0
## 63           6.0         2.2
## 64           6.1         2.9
## 65           5.6         2.9
## 66           6.7         3.1
## 67           5.6         3.0
## 68           5.8         2.7
## 69           6.2         2.2
## 70           5.6         2.5
## 71           5.9         3.2
## 72           6.1         2.8
## 73           6.3         2.5
## 74           6.1         2.8
## 75           6.4         2.9
## 76           6.6         3.0
## 77           6.8         2.8
## 78           6.7         3.0
## 79           6.0         2.9
## 80           5.7         2.6
## 81           5.5         2.4
## 82           5.5         2.4
## 83           5.8         2.7
## 84           6.0         2.7
## 85           5.4         3.0
## 86           6.0         3.4
## 87           6.7         3.1
## 88           6.3         2.3
## 89           5.6         3.0
## 90           5.5         2.5
## 91           5.5         2.6
## 92           6.1         3.0
## 93           5.8         2.6
## 94           5.0         2.3
## 95           5.6         2.7
## 96           5.7         3.0
## 97           5.7         2.9
## 98           6.2         2.9
## 99           5.1         2.5
## 100          5.7         2.8
## 101          6.3         3.3
## 102          5.8         2.7
## 103          7.1         3.0
## 104          6.3         2.9
## 105          6.5         3.0
## 106          7.6         3.0
## 107          4.9         2.5
## 108          7.3         2.9
## 109          6.7         2.5
## 110          7.2         3.6
## 111          6.5         3.2
## 112          6.4         2.7
## 113          6.8         3.0
## 114          5.7         2.5
## 115          5.8         2.8
## 116          6.4         3.2
## 117          6.5         3.0
## 118          7.7         3.8
## 119          7.7         2.6
## 120          6.0         2.2
## 121          6.9         3.2
## 122          5.6         2.8
## 123          7.7         2.8
## 124          6.3         2.7
## 125          6.7         3.3
## 126          7.2         3.2
## 127          6.2         2.8
## 128          6.1         3.0
## 129          6.4         2.8
## 130          7.2         3.0
## 131          7.4         2.8
## 132          7.9         3.8
## 133          6.4         2.8
## 134          6.3         2.8
## 135          6.1         2.6
## 136          7.7         3.0
## 137          6.3         3.4
## 138          6.4         3.1
## 139          6.0         3.0
## 140          6.9         3.1
## 141          6.7         3.1
## 142          6.9         3.1
## 143          5.8         2.7
## 144          6.8         3.2
## 145          6.7         3.3
## 146          6.7         3.0
## 147          6.3         2.5
## 148          6.5         3.0
## 149          6.2         3.4
## 150          5.9         3.0

iris[, "Sepal.Length"]

##   [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
##  [19] 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.0
##  [37] 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0 6.4 6.9 5.5
##  [55] 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
##  [73] 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4 6.0 6.7 6.3 5.6 5.5
##  [91] 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
## [109] 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.0 6.9 5.6 7.7 6.3 6.7 7.2
## [127] 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8
## [145] 6.7 6.7 6.3 6.5 6.2 5.9

iris[, "Sepal.Length", drop = FALSE]

##     Sepal.Length
## 1            5.1
## 2            4.9
## 3            4.7
## 4            4.6
## 5            5.0
## 6            5.4
## 7            4.6
## 8            5.0
## 9            4.4
## 10           4.9
## 11           5.4
## 12           4.8
## 13           4.8
## 14           4.3
## 15           5.8
## 16           5.7
## 17           5.4
## 18           5.1
## 19           5.7
## 20           5.1
## 21           5.4
## 22           5.1
## 23           4.6
## 24           5.1
## 25           4.8
## 26           5.0
## 27           5.0
## 28           5.2
## 29           5.2
## 30           4.7
## 31           4.8
## 32           5.4
## 33           5.2
## 34           5.5
## 35           4.9
## 36           5.0
## 37           5.5
## 38           4.9
## 39           4.4
## 40           5.1
## 41           5.0
## 42           4.5
## 43           4.4
## 44           5.0
## 45           5.1
## 46           4.8
## 47           5.1
## 48           4.6
## 49           5.3
## 50           5.0
## 51           7.0
## 52           6.4
## 53           6.9
## 54           5.5
## 55           6.5
## 56           5.7
## 57           6.3
## 58           4.9
## 59           6.6
## 60           5.2
## 61           5.0
## 62           5.9
## 63           6.0
## 64           6.1
## 65           5.6
## 66           6.7
## 67           5.6
## 68           5.8
## 69           6.2
## 70           5.6
## 71           5.9
## 72           6.1
## 73           6.3
## 74           6.1
## 75           6.4
## 76           6.6
## 77           6.8
## 78           6.7
## 79           6.0
## 80           5.7
## 81           5.5
## 82           5.5
## 83           5.8
## 84           6.0
## 85           5.4
## 86           6.0
## 87           6.7
## 88           6.3
## 89           5.6
## 90           5.5
## 91           5.5
## 92           6.1
## 93           5.8
## 94           5.0
## 95           5.6
## 96           5.7
## 97           5.7
## 98           6.2
## 99           5.1
## 100          5.7
## 101          6.3
## 102          5.8
## 103          7.1
## 104          6.3
## 105          6.5
## 106          7.6
## 107          4.9
## 108          7.3
## 109          6.7
## 110          7.2
## 111          6.5
## 112          6.4
## 113          6.8
## 114          5.7
## 115          5.8
## 116          6.4
## 117          6.5
## 118          7.7
## 119          7.7
## 120          6.0
## 121          6.9
## 122          5.6
## 123          7.7
## 124          6.3
## 125          6.7
## 126          7.2
## 127          6.2
## 128          6.1
## 129          6.4
## 130          7.2
## 131          7.4
## 132          7.9
## 133          6.4
## 134          6.3
## 135          6.1
## 136          7.7
## 137          6.3
## 138          6.4
## 139          6.0
## 140          6.9
## 141          6.7
## 142          6.9
## 143          5.8
## 144          6.8
## 145          6.7
## 146          6.7
## 147          6.3
## 148          6.5
## 149          6.2
## 150          5.9

iris["Sepal.Length"]

##     Sepal.Length
## 1            5.1
## 2            4.9
## 3            4.7
## 4            4.6
## 5            5.0
## 6            5.4
## 7            4.6
## 8            5.0
## 9            4.4
## 10           4.9
## 11           5.4
## 12           4.8
## 13           4.8
## 14           4.3
## 15           5.8
## 16           5.7
## 17           5.4
## 18           5.1
## 19           5.7
## 20           5.1
## 21           5.4
## 22           5.1
## 23           4.6
## 24           5.1
## 25           4.8
## 26           5.0
## 27           5.0
## 28           5.2
## 29           5.2
## 30           4.7
## 31           4.8
## 32           5.4
## 33           5.2
## 34           5.5
## 35           4.9
## 36           5.0
## 37           5.5
## 38           4.9
## 39           4.4
## 40           5.1
## 41           5.0
## 42           4.5
## 43           4.4
## 44           5.0
## 45           5.1
## 46           4.8
## 47           5.1
## 48           4.6
## 49           5.3
## 50           5.0
## 51           7.0
## 52           6.4
## 53           6.9
## 54           5.5
## 55           6.5
## 56           5.7
## 57           6.3
## 58           4.9
## 59           6.6
## 60           5.2
## 61           5.0
## 62           5.9
## 63           6.0
## 64           6.1
## 65           5.6
## 66           6.7
## 67           5.6
## 68           5.8
## 69           6.2
## 70           5.6
## 71           5.9
## 72           6.1
## 73           6.3
## 74           6.1
## 75           6.4
## 76           6.6
## 77           6.8
## 78           6.7
## 79           6.0
## 80           5.7
## 81           5.5
## 82           5.5
## 83           5.8
## 84           6.0
## 85           5.4
## 86           6.0
## 87           6.7
## 88           6.3
## 89           5.6
## 90           5.5
## 91           5.5
## 92           6.1
## 93           5.8
## 94           5.0
## 95           5.6
## 96           5.7
## 97           5.7
## 98           6.2
## 99           5.1
## 100          5.7
## 101          6.3
## 102          5.8
## 103          7.1
## 104          6.3
## 105          6.5
## 106          7.6
## 107          4.9
## 108          7.3
## 109          6.7
## 110          7.2
## 111          6.5
## 112          6.4
## 113          6.8
## 114          5.7
## 115          5.8
## 116          6.4
## 117          6.5
## 118          7.7
## 119          7.7
## 120          6.0
## 121          6.9
## 122          5.6
## 123          7.7
## 124          6.3
## 125          6.7
## 126          7.2
## 127          6.2
## 128          6.1
## 129          6.4
## 130          7.2
## 131          7.4
## 132          7.9
## 133          6.4
## 134          6.3
## 135          6.1
## 136          7.7
## 137          6.3
## 138          6.4
## 139          6.0
## 140          6.9
## 141          6.7
## 142          6.9
## 143          5.8
## 144          6.8
## 145          6.7
## 146          6.7
## 147          6.3
## 148          6.5
## 149          6.2
## 150          5.9

iris[1:5, c("Sepal.Length", "Sepal.Width")]

##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6

iris[iris$Sepal.Length > 7,]

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 103          7.1         3.0          5.9         2.1 virginica
## 106          7.6         3.0          6.6         2.1 virginica
## 108          7.3         2.9          6.3         1.8 virginica
## 110          7.2         3.6          6.1         2.5 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 126          7.2         3.2          6.0         1.8 virginica
## 130          7.2         3.0          5.8         1.6 virginica
## 131          7.4         2.8          6.1         1.9 virginica
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica

iris[iris$Sepal.Length > 7, c("Sepal.Length", "Sepal.Width", "Species")]

##     Sepal.Length Sepal.Width   Species
## 103          7.1         3.0 virginica
## 106          7.6         3.0 virginica
## 108          7.3         2.9 virginica
## 110          7.2         3.6 virginica
## 118          7.7         3.8 virginica
## 119          7.7         2.6 virginica
## 123          7.7         2.8 virginica
## 126          7.2         3.2 virginica
## 130          7.2         3.0 virginica
## 131          7.4         2.8 virginica
## 132          7.9         3.8 virginica
## 136          7.7         3.0 virginica

subset(iris,
       subset = (Sepal.Length > 7),
       select = c("Sepal.Length", "Sepal.Width", "Species"))

##     Sepal.Length Sepal.Width   Species
## 103          7.1         3.0 virginica
## 106          7.6         3.0 virginica
## 108          7.3         2.9 virginica
## 110          7.2         3.6 virginica
## 118          7.7         3.8 virginica
## 119          7.7         2.6 virginica
## 123          7.7         2.8 virginica
## 126          7.2         3.2 virginica
## 130          7.2         3.0 virginica
## 131          7.4         2.8 virginica
## 132          7.9         3.8 virginica
## 136          7.7         3.0 virginica

sample(), set.seed()

sample(x = 1:10, size = 5)

## [1] 9 4 3 1 6

sample(x = 10, size = 5)

## [1]  9  1  7 10  6

sample(x = 10, size = 5, replace = TRUE)

## [1] 1 4 5 9 7

sample(10)

##  [1]  4  5  8 10  1  2  3  9  6  7

set.seed(1)
sample(x = 10, size = 5, replace = TRUE)

## [1] 9 4 7 1 2

sample(x = 10, size = 5, replace = TRUE)

## [1] 7 2 3 1 5

set.seed(1)
sample(x = 10, size = 5, replace = TRUE)

## [1] 9 4 7 1 2

sample(iris, 3)

##        Species Petal.Length Petal.Width
## 1       setosa          1.4         0.2
## 2       setosa          1.4         0.2
## 3       setosa          1.3         0.2
## 4       setosa          1.5         0.2
## 5       setosa          1.4         0.2
## 6       setosa          1.7         0.4
## 7       setosa          1.4         0.3
## 8       setosa          1.5         0.2
## 9       setosa          1.4         0.2
## 10      setosa          1.5         0.1
## 11      setosa          1.5         0.2
## 12      setosa          1.6         0.2
## 13      setosa          1.4         0.1
## 14      setosa          1.1         0.1
## 15      setosa          1.2         0.2
## 16      setosa          1.5         0.4
## 17      setosa          1.3         0.4
## 18      setosa          1.4         0.3
## 19      setosa          1.7         0.3
## 20      setosa          1.5         0.3
## 21      setosa          1.7         0.2
## 22      setosa          1.5         0.4
## 23      setosa          1.0         0.2
## 24      setosa          1.7         0.5
## 25      setosa          1.9         0.2
## 26      setosa          1.6         0.2
## 27      setosa          1.6         0.4
## 28      setosa          1.5         0.2
## 29      setosa          1.4         0.2
## 30      setosa          1.6         0.2
## 31      setosa          1.6         0.2
## 32      setosa          1.5         0.4
## 33      setosa          1.5         0.1
## 34      setosa          1.4         0.2
## 35      setosa          1.5         0.2
## 36      setosa          1.2         0.2
## 37      setosa          1.3         0.2
## 38      setosa          1.4         0.1
## 39      setosa          1.3         0.2
## 40      setosa          1.5         0.2
## 41      setosa          1.3         0.3
## 42      setosa          1.3         0.3
## 43      setosa          1.3         0.2
## 44      setosa          1.6         0.6
## 45      setosa          1.9         0.4
## 46      setosa          1.4         0.3
## 47      setosa          1.6         0.2
## 48      setosa          1.4         0.2
## 49      setosa          1.5         0.2
## 50      setosa          1.4         0.2
## 51  versicolor          4.7         1.4
## 52  versicolor          4.5         1.5
## 53  versicolor          4.9         1.5
## 54  versicolor          4.0         1.3
## 55  versicolor          4.6         1.5
## 56  versicolor          4.5         1.3
## 57  versicolor          4.7         1.6
## 58  versicolor          3.3         1.0
## 59  versicolor          4.6         1.3
## 60  versicolor          3.9         1.4
## 61  versicolor          3.5         1.0
## 62  versicolor          4.2         1.5
## 63  versicolor          4.0         1.0
## 64  versicolor          4.7         1.4
## 65  versicolor          3.6         1.3
## 66  versicolor          4.4         1.4
## 67  versicolor          4.5         1.5
## 68  versicolor          4.1         1.0
## 69  versicolor          4.5         1.5
## 70  versicolor          3.9         1.1
## 71  versicolor          4.8         1.8
## 72  versicolor          4.0         1.3
## 73  versicolor          4.9         1.5
## 74  versicolor          4.7         1.2
## 75  versicolor          4.3         1.3
## 76  versicolor          4.4         1.4
## 77  versicolor          4.8         1.4
## 78  versicolor          5.0         1.7
## 79  versicolor          4.5         1.5
## 80  versicolor          3.5         1.0
## 81  versicolor          3.8         1.1
## 82  versicolor          3.7         1.0
## 83  versicolor          3.9         1.2
## 84  versicolor          5.1         1.6
## 85  versicolor          4.5         1.5
## 86  versicolor          4.5         1.6
## 87  versicolor          4.7         1.5
## 88  versicolor          4.4         1.3
## 89  versicolor          4.1         1.3
## 90  versicolor          4.0         1.3
## 91  versicolor          4.4         1.2
## 92  versicolor          4.6         1.4
## 93  versicolor          4.0         1.2
## 94  versicolor          3.3         1.0
## 95  versicolor          4.2         1.3
## 96  versicolor          4.2         1.2
## 97  versicolor          4.2         1.3
## 98  versicolor          4.3         1.3
## 99  versicolor          3.0         1.1
## 100 versicolor          4.1         1.3
## 101  virginica          6.0         2.5
## 102  virginica          5.1         1.9
## 103  virginica          5.9         2.1
## 104  virginica          5.6         1.8
## 105  virginica          5.8         2.2
## 106  virginica          6.6         2.1
## 107  virginica          4.5         1.7
## 108  virginica          6.3         1.8
## 109  virginica          5.8         1.8
## 110  virginica          6.1         2.5
## 111  virginica          5.1         2.0
## 112  virginica          5.3         1.9
## 113  virginica          5.5         2.1
## 114  virginica          5.0         2.0
## 115  virginica          5.1         2.4
## 116  virginica          5.3         2.3
## 117  virginica          5.5         1.8
## 118  virginica          6.7         2.2
## 119  virginica          6.9         2.3
## 120  virginica          5.0         1.5
## 121  virginica          5.7         2.3
## 122  virginica          4.9         2.0
## 123  virginica          6.7         2.0
## 124  virginica          4.9         1.8
## 125  virginica          5.7         2.1
## 126  virginica          6.0         1.8
## 127  virginica          4.8         1.8
## 128  virginica          4.9         1.8
## 129  virginica          5.6         2.1
## 130  virginica          5.8         1.6
## 131  virginica          6.1         1.9
## 132  virginica          6.4         2.0
## 133  virginica          5.6         2.2
## 134  virginica          5.1         1.5
## 135  virginica          5.6         1.4
## 136  virginica          6.1         2.3
## 137  virginica          5.6         2.4
## 138  virginica          5.5         1.8
## 139  virginica          4.8         1.8
## 140  virginica          5.4         2.1
## 141  virginica          5.6         2.4
## 142  virginica          5.1         2.3
## 143  virginica          5.1         1.9
## 144  virginica          5.9         2.3
## 145  virginica          5.7         2.5
## 146  virginica          5.2         2.3
## 147  virginica          5.0         1.9
## 148  virginica          5.2         2.0
## 149  virginica          5.4         2.3
## 150  virginica          5.1         1.8

set.seed(1)
index <- sample(nrow(iris), 3)
index

## [1]  68 129  43

iris[index,]

##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 68           5.8         2.7          4.1         1.0 versicolor
## 129          6.4         2.8          5.6         2.1  virginica
## 43           4.4         3.2          1.3         0.2     setosa

duplicated(), which(), unique()

duplicated(c(1, 2, 3, 1, 1, 4, 3))

## [1] FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE

id <- c("A001", "A002", "A003")
name <- c("Mouse", "Keyboard", "USB")
price <- c(30000, 90000, 50000)
product <- data.frame(id = id, name = name, price = price)
product

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

product <- rbind(product, c("A001", "Mouse", 30000))
product

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
## 4 A001    Mouse 30000

duplicated(product)

## [1] FALSE FALSE FALSE  TRUE

product[!duplicated(product), ]

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

which(duplicated(product))

## [1] 4

index <- which(duplicated(product))
product[-index,]

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

unique(product)

##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

complete.cases(), na.omit(), cut()

str(airquality)

## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

complete.cases(airquality)

##   [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [73]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [97] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [109]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE

airquality.nona <- airquality[complete.cases(airquality),]
str(airquality.nona)

## 'data.frame':    111 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 23 19 8 16 11 14 ...
##  $ Solar.R: int  190 118 149 313 299 99 19 256 290 274 ...
##  $ Wind   : num  7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
##  $ Temp   : int  67 72 74 62 65 59 61 69 66 68 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 7 8 9 12 13 14 ...

airquality.nona <- na.omit(airquality)
str(airquality.nona)

## 'data.frame':    111 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 23 19 8 16 11 14 ...
##  $ Solar.R: int  190 118 149 313 299 99 19 256 290 274 ...
##  $ Wind   : num  7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
##  $ Temp   : int  67 72 74 62 65 59 61 69 66 68 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 7 8 9 12 13 14 ...
##  - attr(*, "na.action")= 'omit' Named int [1:42] 5 6 10 11 25 26 27 32 33 34 ...
##   ..- attr(*, "names")= chr [1:42] "5" "6" "10" "11" ...

cut(x = iris$Sepal.Width, breaks = c(0, 1, 2, 3 ,4, 5))

##   [1] (3,4] (2,3] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4]
##  [13] (2,3] (2,3] (3,4] (4,5] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4]
##  [25] (3,4] (2,3] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (4,5] (4,5] (3,4] (3,4]
##  [37] (3,4] (3,4] (2,3] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4] (2,3] (3,4] (3,4]
##  [49] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3]
##  [61] (1,2] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3]
##  [73] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
##  [85] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
##  [97] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
## [109] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (3,4] (2,3] (2,3]
## [121] (3,4] (2,3] (2,3] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (3,4]
## [133] (2,3] (2,3] (2,3] (2,3] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4] (2,3] (3,4]
## [145] (3,4] (2,3] (2,3] (2,3] (3,4] (2,3]
## Levels: (0,1] (1,2] (2,3] (3,4] (4,5]

cut(x = iris$Sepal.Width, breaks = 5)

##   [1] (3.44,3.92] (2.96,3.44] (2.96,3.44] (2.96,3.44] (3.44,3.92] (3.44,3.92]
##   [7] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44] (3.44,3.92] (2.96,3.44]
##  [13] (2.96,3.44] (2.96,3.44] (3.92,4.4]  (3.92,4.4]  (3.44,3.92] (3.44,3.92]
##  [19] (3.44,3.92] (3.44,3.92] (2.96,3.44] (3.44,3.92] (3.44,3.92] (2.96,3.44]
##  [25] (2.96,3.44] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2.96,3.44] (2.96,3.44]
##  [31] (2.96,3.44] (2.96,3.44] (3.92,4.4]  (3.92,4.4]  (2.96,3.44] (2.96,3.44]
##  [37] (3.44,3.92] (3.44,3.92] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2,2.48]   
##  [43] (2.96,3.44] (3.44,3.92] (3.44,3.92] (2.96,3.44] (3.44,3.92] (2.96,3.44]
##  [49] (3.44,3.92] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2,2.48]   
##  [55] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2,2.48]    (2.48,2.96] (2.48,2.96]
##  [61] (2,2.48]    (2.96,3.44] (2,2.48]    (2.48,2.96] (2.48,2.96] (2.96,3.44]
##  [67] (2.96,3.44] (2.48,2.96] (2,2.48]    (2.48,2.96] (2.96,3.44] (2.48,2.96]
##  [73] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2.96,3.44]
##  [79] (2.48,2.96] (2.48,2.96] (2,2.48]    (2,2.48]    (2.48,2.96] (2.48,2.96]
##  [85] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2,2.48]    (2.96,3.44] (2.48,2.96]
##  [91] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2,2.48]    (2.48,2.96] (2.96,3.44]
##  [97] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.48,2.96]
## [103] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.48,2.96]
## [109] (2.48,2.96] (3.44,3.92] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.48,2.96]
## [115] (2.48,2.96] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2.48,2.96] (2,2.48]   
## [121] (2.96,3.44] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.96,3.44]
## [127] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.48,2.96] (3.44,3.92]
## [133] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.96,3.44]
## [139] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44]
## [145] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.96,3.44]
## Levels: (2,2.48] (2.48,2.96] (2.96,3.44] (3.44,3.92] (3.92,4.4]

iris.cut <- cut(x = iris$Sepal.Width, breaks = c(0, 1, 2, 3 ,4, 5))
table(iris.cut)

## iris.cut
## (0,1] (1,2] (2,3] (3,4] (4,5] 
##     0     1    82    64     3

summary(iris.cut)

## (0,1] (1,2] (2,3] (3,4] (4,5] 
##     0     1    82    64     3

iris.cut <- cut(x = iris$Sepal.Width,
                breaks = c(0, 1, 2, 3 ,4, 5),
                labels = c("Smaller", "Small", "Medium", "Big", "Bigger"))
iris.cut

##   [1] Big    Medium Big    Big    Big    Big    Big    Big    Medium Big   
##  [11] Big    Big    Medium Medium Big    Bigger Big    Big    Big    Big   
##  [21] Big    Big    Big    Big    Big    Medium Big    Big    Big    Big   
##  [31] Big    Big    Bigger Bigger Big    Big    Big    Big    Medium Big   
##  [41] Big    Medium Big    Big    Big    Medium Big    Big    Big    Big   
##  [51] Big    Big    Big    Medium Medium Medium Big    Medium Medium Medium
##  [61] Small  Medium Medium Medium Medium Big    Medium Medium Medium Medium
##  [71] Big    Medium Medium Medium Medium Medium Medium Medium Medium Medium
##  [81] Medium Medium Medium Medium Medium Big    Big    Medium Medium Medium
##  [91] Medium Medium Medium Medium Medium Medium Medium Medium Medium Medium
## [101] Big    Medium Medium Medium Medium Medium Medium Medium Medium Big   
## [111] Big    Medium Medium Medium Medium Big    Medium Big    Medium Medium
## [121] Big    Medium Medium Medium Big    Big    Medium Medium Medium Medium
## [131] Medium Big    Medium Medium Medium Medium Big    Big    Medium Big   
## [141] Big    Big    Medium Big    Big    Medium Medium Medium Big    Medium
## Levels: Smaller Small Medium Big Bigger

table(iris.cut)

## iris.cut
## Smaller   Small  Medium     Big  Bigger 
##       0       1      82      64       3

반복 적용

apply()

2D 행렬에서

x <- matrix(1:20, 4, 5)
x

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    5    9   13   17
## [2,]    2    6   10   14   18
## [3,]    3    7   11   15   19
## [4,]    4    8   12   16   20

apply(X = x, MARGIN = 1, FUN = max)

## [1] 17 18 19 20

apply(X = x, MARGIN = 2, FUN = max)

## [1]  4  8 12 16 20

3D 배열에서

y <- array(1:24, c(4, 3, 2))
y

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24

apply(y, 1, paste, collapse = ",")

## [1] "1,5,9,13,17,21"  "2,6,10,14,18,22" "3,7,11,15,19,23" "4,8,12,16,20,24"

apply(y, 2, paste, collapse = ",")

## [1] "1,2,3,4,13,14,15,16"    "5,6,7,8,17,18,19,20"    "9,10,11,12,21,22,23,24"

apply(y, 3, paste, collapse = ",")

## [1] "1,2,3,4,5,6,7,8,9,10,11,12"          "13,14,15,16,17,18,19,20,21,22,23,24"

apply(y, c(1, 2), paste, collapse=",")

##      [,1]   [,2]   [,3]   
## [1,] "1,13" "5,17" "9,21" 
## [2,] "2,14" "6,18" "10,22"
## [3,] "3,15" "7,19" "11,23"
## [4,] "4,16" "8,20" "12,24"

4D 배열에서

Titanic

## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20

str(Titanic)

##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"

apply(Titanic, 1, sum)

##  1st  2nd  3rd Crew 
##  325  285  706  885

apply(Titanic, 4, sum)

##   No  Yes 
## 1490  711

apply(Titanic, "Class", sum)

##  1st  2nd  3rd Crew 
##  325  285  706  885

apply(Titanic, c(1, 4), sum)

##       Survived
## Class   No Yes
##   1st  122 203
##   2nd  167 118
##   3rd  528 178
##   Crew 673 212

lapply(), sapply()

행렬, 배열에서

exams <- list(s20 = c(78, 89, 91, 85, 85, 87),
              s21 = c(85, 86, 97, 99, 90),
              s22 = c(98, 96, 89, 90, 93, 85, 92),
              s23 = c(98, 96, 91, 88, 93, 99)
              )
exams

## $s20
## [1] 78 89 91 85 85 87
## 
## $s21
## [1] 85 86 97 99 90
## 
## $s22
## [1] 98 96 89 90 93 85 92
## 
## $s23
## [1] 98 96 91 88 93 99

lapply(exams, length)

## $s20
## [1] 6
## 
## $s21
## [1] 5
## 
## $s22
## [1] 7
## 
## $s23
## [1] 6

sapply(exams, length)

## s20 s21 s22 s23 
##   6   5   7   6

sapply(exams, mean)

##      s20      s21      s22      s23 
## 85.83333 91.40000 91.85714 94.16667

sapply(exams, sd)

##      s20      s21      s22      s23 
## 4.490731 6.348228 4.375255 4.262237

sapply(exams, range)

##      s20 s21 s22 s23
## [1,]  78  85  85  88
## [2,]  91  99  98  99

데이터프레임에서

head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

lapply(iris, class)

## $Sepal.Length
## [1] "numeric"
## 
## $Sepal.Width
## [1] "numeric"
## 
## $Petal.Length
## [1] "numeric"
## 
## $Petal.Width
## [1] "numeric"
## 
## $Species
## [1] "factor"

sapply(iris, class)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##    "numeric"    "numeric"    "numeric"    "numeric"     "factor"

sapply(iris, mean)

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##     5.843333     3.057333     3.758000     1.199333           NA

sapply(iris, function(x) ifelse(is.numeric(x), mean(x), NA))

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##     5.843333     3.057333     3.758000     1.199333           NA

mapply(rep, 1:4, 4:1)

## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4

집단 요약

sapply()

data(mtcars)
head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

mtcars <- within(mtcars,
                 am <- factor(am,
                              levels = c(0, 1),
                              labels = c("Automatic",
                                         "Manual")
                              )
)
head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1

g <- split(x = mtcars$mpg, f = mtcars$am)
g

## $Automatic
##  [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5
## [16] 15.5 15.2 13.3 19.2
## 
## $Manual
##  [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4

mean(g[[1]])

## [1] 17.14737

mean(g[["Manual"]])

## [1] 24.39231

sapply(g, mean)

## Automatic    Manual 
##  17.14737  24.39231

unstack(data.frame(mtcars$mpg, mtcars$am))

## $Automatic
##  [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5
## [16] 15.5 15.2 13.3 19.2
## 
## $Manual
##  [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4

unstack()

head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

gg <- unstack(data.frame(iris$Sepal.Length, iris$Species))
head(gg)

##   setosa versicolor virginica
## 1    5.1        7.0       6.3
## 2    4.9        6.4       5.8
## 3    4.7        6.9       7.1
## 4    4.6        5.5       6.3
## 5    5.0        6.5       6.5
## 6    5.4        5.7       7.6

str(gg)

## 'data.frame':    50 obs. of  3 variables:
##  $ setosa    : num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ versicolor: num  7 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 ...
##  $ virginica : num  6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 ...

summary(gg)

##      setosa        versicolor      virginica    
##  Min.   :4.300   Min.   :4.900   Min.   :4.900  
##  1st Qu.:4.800   1st Qu.:5.600   1st Qu.:6.225  
##  Median :5.000   Median :5.900   Median :6.500  
##  Mean   :5.006   Mean   :5.936   Mean   :6.588  
##  3rd Qu.:5.200   3rd Qu.:6.300   3rd Qu.:6.900  
##  Max.   :5.800   Max.   :7.000   Max.   :7.900

tapply()

head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

tapply(X = iris$Sepal.Length, INDEX = iris$Species, FUN=mean)

##     setosa versicolor  virginica 
##      5.006      5.936      6.588

tapply(X = iris$Sepal.Length, INDEX = iris$Species, FUN=length)

##     setosa versicolor  virginica 
##         50         50         50

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1

with(mtcars, tapply(mpg, list(cyl, am), mean))

##   Automatic   Manual
## 4    22.900 28.07500
## 6    19.125 20.56667
## 8    15.050 15.40000

with(mtcars, tapply(mpg,
                    list(
                      Cyliner = cyl,
                      Transmission = am),
                    mean)
     )

##        Transmission
## Cyliner Automatic   Manual
##       4    22.900 28.07500
##       6    19.125 20.56667
##       8    15.050 15.40000

aggregate()

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1

with(mtcars, aggregate(x = mpg, by = list(cyl, am), FUN = mean))

##   Group.1   Group.2        x
## 1       4 Automatic 22.90000
## 2       6 Automatic 19.12500
## 3       8 Automatic 15.05000
## 4       4    Manual 28.07500
## 5       6    Manual 20.56667
## 6       8    Manual 15.40000

aggregate(mtcars[c(1:6)],
          list(Group.cyl = mtcars$cyl, Group.am = mtcars$am),
          mean)

##   Group.cyl  Group.am      mpg cyl     disp        hp     drat       wt
## 1         4 Automatic 22.90000   4 135.8667  84.66667 3.770000 2.935000
## 2         6 Automatic 19.12500   6 204.5500 115.25000 3.420000 3.388750
## 3         8 Automatic 15.05000   8 357.6167 194.16667 3.120833 4.104083
## 4         4    Manual 28.07500   4  93.6125  81.87500 4.183750 2.042250
## 5         6    Manual 20.56667   6 155.0000 131.66667 3.806667 2.755000
## 6         8    Manual 15.40000   8 326.0000 299.50000 3.880000 3.370000

aggregate(iris[1:4], list(Species = iris$Species), mean)

##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026

by()

by(data = iris, INDICES = iris$Species, FUN = summary)

## iris$Species: setosa
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.300   Min.   :1.000   Min.   :0.100  
##  1st Qu.:4.800   1st Qu.:3.200   1st Qu.:1.400   1st Qu.:0.200  
##  Median :5.000   Median :3.400   Median :1.500   Median :0.200  
##  Mean   :5.006   Mean   :3.428   Mean   :1.462   Mean   :0.246  
##  3rd Qu.:5.200   3rd Qu.:3.675   3rd Qu.:1.575   3rd Qu.:0.300  
##  Max.   :5.800   Max.   :4.400   Max.   :1.900   Max.   :0.600  
##        Species  
##  setosa    :50  
##  versicolor: 0  
##  virginica : 0  
##                 
##                 
##                 
## ------------------------------------------------------------ 
## iris$Species: versicolor
##   Sepal.Length    Sepal.Width     Petal.Length   Petal.Width          Species  
##  Min.   :4.900   Min.   :2.000   Min.   :3.00   Min.   :1.000   setosa    : 0  
##  1st Qu.:5.600   1st Qu.:2.525   1st Qu.:4.00   1st Qu.:1.200   versicolor:50  
##  Median :5.900   Median :2.800   Median :4.35   Median :1.300   virginica : 0  
##  Mean   :5.936   Mean   :2.770   Mean   :4.26   Mean   :1.326                  
##  3rd Qu.:6.300   3rd Qu.:3.000   3rd Qu.:4.60   3rd Qu.:1.500                  
##  Max.   :7.000   Max.   :3.400   Max.   :5.10   Max.   :1.800                  
## ------------------------------------------------------------ 
## iris$Species: virginica
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.900   Min.   :2.200   Min.   :4.500   Min.   :1.400  
##  1st Qu.:6.225   1st Qu.:2.800   1st Qu.:5.100   1st Qu.:1.800  
##  Median :6.500   Median :3.000   Median :5.550   Median :2.000  
##  Mean   :6.588   Mean   :2.974   Mean   :5.552   Mean   :2.026  
##  3rd Qu.:6.900   3rd Qu.:3.175   3rd Qu.:5.875   3rd Qu.:2.300  
##  Max.   :7.900   Max.   :3.800   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    : 0  
##  versicolor: 0  
##  virginica :50  
##                 
##                 
##

table()

table(mtcars$gear)

## 
##  3  4  5 
## 15 12  5

table(mtcars$am)

## 
## Automatic    Manual 
##        19        13

table(mtcars$am, mtcars$gear)

##            
##              3  4  5
##   Automatic 15  4  0
##   Manual     0  8  5

cut()

mpg.cut <- cut(mtcars$mpg, breaks = 5)
table(mpg.cut)

## mpg.cut
## (10.4,15.1] (15.1,19.8] (19.8,24.5] (24.5,29.2] (29.2,33.9] 
##           6          12           8           2           4

분할, 적용, 결합

# install.packages("dplyr")
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

head(airquality)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

str(airquality)

## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

filter()

filter(airquality, Month == 6)

##    Ozone Solar.R Wind Temp Month Day
## 1     NA     286  8.6   78     6   1
## 2     NA     287  9.7   74     6   2
## 3     NA     242 16.1   67     6   3
## 4     NA     186  9.2   84     6   4
## 5     NA     220  8.6   85     6   5
## 6     NA     264 14.3   79     6   6
## 7     29     127  9.7   82     6   7
## 8     NA     273  6.9   87     6   8
## 9     71     291 13.8   90     6   9
## 10    39     323 11.5   87     6  10
## 11    NA     259 10.9   93     6  11
## 12    NA     250  9.2   92     6  12
## 13    23     148  8.0   82     6  13
## 14    NA     332 13.8   80     6  14
## 15    NA     322 11.5   79     6  15
## 16    21     191 14.9   77     6  16
## 17    37     284 20.7   72     6  17
## 18    20      37  9.2   65     6  18
## 19    12     120 11.5   73     6  19
## 20    13     137 10.3   76     6  20
## 21    NA     150  6.3   77     6  21
## 22    NA      59  1.7   76     6  22
## 23    NA      91  4.6   76     6  23
## 24    NA     250  6.3   76     6  24
## 25    NA     135  8.0   75     6  25
## 26    NA     127  8.0   78     6  26
## 27    NA      47 10.3   73     6  27
## 28    NA      98 11.5   80     6  28
## 29    NA      31 14.9   77     6  29
## 30    NA     138  8.0   83     6  30

airquality[airquality$Month == 6,]

##    Ozone Solar.R Wind Temp Month Day
## 32    NA     286  8.6   78     6   1
## 33    NA     287  9.7   74     6   2
## 34    NA     242 16.1   67     6   3
## 35    NA     186  9.2   84     6   4
## 36    NA     220  8.6   85     6   5
## 37    NA     264 14.3   79     6   6
## 38    29     127  9.7   82     6   7
## 39    NA     273  6.9   87     6   8
## 40    71     291 13.8   90     6   9
## 41    39     323 11.5   87     6  10
## 42    NA     259 10.9   93     6  11
## 43    NA     250  9.2   92     6  12
## 44    23     148  8.0   82     6  13
## 45    NA     332 13.8   80     6  14
## 46    NA     322 11.5   79     6  15
## 47    21     191 14.9   77     6  16
## 48    37     284 20.7   72     6  17
## 49    20      37  9.2   65     6  18
## 50    12     120 11.5   73     6  19
## 51    13     137 10.3   76     6  20
## 52    NA     150  6.3   77     6  21
## 53    NA      59  1.7   76     6  22
## 54    NA      91  4.6   76     6  23
## 55    NA     250  6.3   76     6  24
## 56    NA     135  8.0   75     6  25
## 57    NA     127  8.0   78     6  26
## 58    NA      47 10.3   73     6  27
## 59    NA      98 11.5   80     6  28
## 60    NA      31 14.9   77     6  29
## 61    NA     138  8.0   83     6  30

subset(airquality, subset = (Month == 6))

##    Ozone Solar.R Wind Temp Month Day
## 32    NA     286  8.6   78     6   1
## 33    NA     287  9.7   74     6   2
## 34    NA     242 16.1   67     6   3
## 35    NA     186  9.2   84     6   4
## 36    NA     220  8.6   85     6   5
## 37    NA     264 14.3   79     6   6
## 38    29     127  9.7   82     6   7
## 39    NA     273  6.9   87     6   8
## 40    71     291 13.8   90     6   9
## 41    39     323 11.5   87     6  10
## 42    NA     259 10.9   93     6  11
## 43    NA     250  9.2   92     6  12
## 44    23     148  8.0   82     6  13
## 45    NA     332 13.8   80     6  14
## 46    NA     322 11.5   79     6  15
## 47    21     191 14.9   77     6  16
## 48    37     284 20.7   72     6  17
## 49    20      37  9.2   65     6  18
## 50    12     120 11.5   73     6  19
## 51    13     137 10.3   76     6  20
## 52    NA     150  6.3   77     6  21
## 53    NA      59  1.7   76     6  22
## 54    NA      91  4.6   76     6  23
## 55    NA     250  6.3   76     6  24
## 56    NA     135  8.0   75     6  25
## 57    NA     127  8.0   78     6  26
## 58    NA      47 10.3   73     6  27
## 59    NA      98 11.5   80     6  28
## 60    NA      31 14.9   77     6  29
## 61    NA     138  8.0   83     6  30

air <- filter(airquality, Month == 6, Temp > 90)
air

##   Ozone Solar.R Wind Temp Month Day
## 1    NA     259 10.9   93     6  11
## 2    NA     250  9.2   92     6  12

air <- filter(airquality, Month == 6 & Temp > 90)
air

##   Ozone Solar.R Wind Temp Month Day
## 1    NA     259 10.9   93     6  11
## 2    NA     250  9.2   92     6  12

air <- filter(airquality, Ozone > 80 | Temp > 90)
air

##    Ozone Solar.R Wind Temp Month Day
## 1    115     223  5.7   79     5  30
## 2     NA     259 10.9   93     6  11
## 3     NA     250  9.2   92     6  12
## 4    135     269  4.1   84     7   1
## 5     97     267  6.3   92     7   8
## 6     97     272  5.7   92     7   9
## 7     85     175  7.4   89     7  10
## 8     NA     291 14.9   91     7  14
## 9    108     223  8.0   85     7  25
## 10    82     213  7.4   88     7  28
## 11   122     255  4.0   89     8   7
## 12    89     229 10.3   90     8   8
## 13   110     207  8.0   90     8   9
## 14    NA     222  8.6   92     8  10
## 15   168     238  3.4   81     8  25
## 16    76     203  9.7   97     8  28
## 17   118     225  2.3   94     8  29
## 18    84     237  6.3   96     8  30
## 19    85     188  6.3   94     8  31
## 20    96     167  6.9   91     9   1
## 21    78     197  5.1   92     9   2
## 22    73     183  2.8   93     9   3
## 23    91     189  4.6   93     9   4

slice()

slice(airquality, 6:10)

##   Ozone Solar.R Wind Temp Month Day
## 1    28      NA 14.9   66     5   6
## 2    23     299  8.6   65     5   7
## 3    19      99 13.8   59     5   8
## 4     8      19 20.1   61     5   9
## 5    NA     194  8.6   69     5  10

slice(airquality, n())

##   Ozone Solar.R Wind Temp Month Day
## 1    20     223 11.5   68     9  30

slice(airquality, (n()-4):n())

##   Ozone Solar.R Wind Temp Month Day
## 1    30     193  6.9   70     9  26
## 2    NA     145 13.2   77     9  27
## 3    14     191 14.3   75     9  28
## 4    18     131  8.0   76     9  29
## 5    20     223 11.5   68     9  30

arrange()

arrange(airquality, Temp, Month, Day) %>% 
  head(5)

##   Ozone Solar.R Wind Temp Month Day
## 1    NA      NA 14.3   56     5   5
## 2     6      78 18.4   57     5  18
## 3    NA      66 16.6   57     5  25
## 4    NA      NA  8.0   57     5  27
## 5    18      65 13.2   58     5  15

arrange(airquality, desc(Temp), Month, Day) %>% 
  head(5)

##   Ozone Solar.R Wind Temp Month Day
## 1    76     203  9.7   97     8  28
## 2    84     237  6.3   96     8  30
## 3   118     225  2.3   94     8  29
## 4    85     188  6.3   94     8  31
## 5    NA     259 10.9   93     6  11

select(), rename(), distinct()

select(airquality, Month, Day, Temp) %>% 
  head(5)

##   Month Day Temp
## 1     5   1   67
## 2     5   2   72
## 3     5   3   74
## 4     5   4   62
## 5     5   5   56

select(airquality, Temp:Day) %>% 
  head(5)

##   Temp Month Day
## 1   67     5   1
## 2   72     5   2
## 3   74     5   3
## 4   62     5   4
## 5   56     5   5

select(airquality, -(Temp:Day)) %>% 
  head(5)

##   Ozone Solar.R Wind
## 1    41     190  7.4
## 2    36     118  8.0
## 3    12     149 12.6
## 4    18     313 11.5
## 5    NA      NA 14.3

select(airquality, Solar = Solar.R) %>% 
  head(5)

##   Solar
## 1   190
## 2   118
## 3   149
## 4   313
## 5    NA

rename(airquality, Solar = Solar.R) %>% 
  head(5)

##   Ozone Solar Wind Temp Month Day
## 1    41   190  7.4   67     5   1
## 2    36   118  8.0   72     5   2
## 3    12   149 12.6   74     5   3
## 4    18   313 11.5   62     5   4
## 5    NA    NA 14.3   56     5   5

distinct(select(airquality, Month))

##   Month
## 1     5
## 2     6
## 3     7
## 4     8
## 5     9

mutate()

air <- mutate(airquality,
       Temp.C = (Temp - 32) / 1.8,
       Diff = Temp.C - mean(Temp.C))
head(air)

##   Ozone Solar.R Wind Temp Month Day   Temp.C       Diff
## 1    41     190  7.4   67     5   1 19.44444  -6.045752
## 2    36     118  8.0   72     5   2 22.22222  -3.267974
## 3    12     149 12.6   74     5   3 23.33333  -2.156863
## 4    18     313 11.5   62     5   4 16.66667  -8.823529
## 5    NA      NA 14.3   56     5   5 13.33333 -12.156863
## 6    28      NA 14.9   66     5   6 18.88889  -6.601307

transform(airquality,
          Temp.C = (Temp - 32) / 1.8,
          Diff = Temp.C - mean(Temp.C))

air <- transform(airquality,
                 Temp.C = (Temp - 32) / 1.8)
head(air)

##   Ozone Solar.R Wind Temp Month Day   Temp.C
## 1    41     190  7.4   67     5   1 19.44444
## 2    36     118  8.0   72     5   2 22.22222
## 3    12     149 12.6   74     5   3 23.33333
## 4    18     313 11.5   62     5   4 16.66667
## 5    NA      NA 14.3   56     5   5 13.33333
## 6    28      NA 14.9   66     5   6 18.88889

summarise()

summarise(airquality,
          mean(Temp),
          median(Temp, na.rm = TRUE),
          sd(Temp, na.rm = TRUE),
          max(Temp, na.rm = TRUE),
          min(Temp, na.rm = TRUE))

##   mean(Temp) median(Temp, na.rm = TRUE) sd(Temp, na.rm = TRUE)
## 1   77.88235                         79                9.46527
##   max(Temp, na.rm = TRUE) min(Temp, na.rm = TRUE)
## 1                      97                      56

summarise(airquality,
          Mean = mean(Temp),
          Median = median(Temp, na.rm = TRUE),
          SD = sd(Temp, na.rm = TRUE),
          Max = max(Temp, na.rm = TRUE),
          Min = min(Temp, na.rm = TRUE),
          N = n(),
          Distinct.Month = n_distinct(Month),
          Distinct.First = first(Month),
          Distinct.Last = last(Month))

##       Mean Median      SD Max Min   N Distinct.Month Distinct.First
## 1 77.88235     79 9.46527  97  56 153              5              5
##   Distinct.Last
## 1             9

sample_n(airquality, 5)

##   Ozone Solar.R Wind Temp Month Day
## 1    14     274 10.9   68     5  14
## 2    13     137 10.3   76     6  20
## 3    80     294  8.6   86     7  24
## 4     1       8  9.7   59     5  21
## 5    65     157  9.7   80     8  14

sample_frac(airquality, 0.05, replace = TRUE)

##   Ozone Solar.R Wind Temp Month Day
## 1    27     175 14.9   81     7  13
## 2    23     299  8.6   65     5   7
## 3    10     264 14.3   73     7  12
## 4    61     285  6.3   84     7  18
## 5    NA     264 14.3   79     6   6
## 6    28     273 11.5   82     8  13
## 7    23     115  7.4   76     8  18
## 8    NA     242 16.1   67     6   3

group_by()

air.group <- group_by(airquality, Month)
class(air.group)

## [1] "grouped_df" "tbl_df"     "tbl"        "data.frame"

air.group

## # A tibble: 153 x 6
## # Groups:   Month [5]
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    41     190   7.4    67     5     1
##  2    36     118   8      72     5     2
##  3    12     149  12.6    74     5     3
##  4    18     313  11.5    62     5     4
##  5    NA      NA  14.3    56     5     5
##  6    28      NA  14.9    66     5     6
##  7    23     299   8.6    65     5     7
##  8    19      99  13.8    59     5     8
##  9     8      19  20.1    61     5     9
## 10    NA     194   8.6    69     5    10
## # ... with 143 more rows

summarise(air.group,
          Mean.Temp = mean(Temp, na.rm = TRUE))

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 5 x 2
##   Month Mean.Temp
##   <int>     <dbl>
## 1     5      65.5
## 2     6      79.1
## 3     7      83.9
## 4     8      84.0
## 5     9      76.9

summarise(air.group,
          Mean.Temp = mean(Temp, na.rm = TRUE),
          SD.Temp = sd(Temp, na.rm = TRUE),
          Days = n())

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 5 x 4
##   Month Mean.Temp SD.Temp  Days
##   <int>     <dbl>   <dbl> <int>
## 1     5      65.5    6.85    31
## 2     6      79.1    6.60    30
## 3     7      83.9    4.32    31
## 4     8      84.0    6.59    31
## 5     9      76.9    8.36    30

%>%

iris %>% head

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

group_by(), summarise()

a1 <- select(airquality, Ozone, Temp, Month)
a2 <- group_by(a1, Month)
a3 <- summarise(a2,
                Mean.Ozone = mean(Ozone, na.rm = TRUE),
                Mean.Temp = mean(Temp, na.rm = TRUE))

## `summarise()` ungrouping output (override with `.groups` argument)

a4 <- filter(a3, Mean.Ozone > 40 | Mean.Temp > 80)
a4

## # A tibble: 2 x 3
##   Month Mean.Ozone Mean.Temp
##   <int>      <dbl>     <dbl>
## 1     7       59.1      83.9
## 2     8       60.0      84.0

air <- airquality %>% 
  select(Ozone, Temp, Month) %>% 
  group_by(Month) %>% 
  summarise(Mean.Ozone = mean(Ozone, na.rm = TRUE),
            Mean.Temp = mean(Temp, na.rm = TRUE)) %>% 
  filter(Mean.Ozone > 40 | Mean.Temp > 80)

## `summarise()` ungrouping output (override with `.groups` argument)

air

## # A tibble: 2 x 3
##   Month Mean.Ozone Mean.Temp
##   <int>      <dbl>     <dbl>
## 1     7       59.1      83.9
## 2     8       60.0      84.0

형태 변환

library(reshape2)

library(reshape2)
smiths

##      subject time age weight height
## 1 John Smith    1  33     90   1.87
## 2 Mary Smith    1  NA     NA   1.54

melt()

melt(data = smiths)

## Using subject as id variables

##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54

melt(data = smiths,
     id.vars = "subject")

##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54

melt(data = smiths,
     measure.vars = c(2:5))

##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54

melt(data = smiths,
     measure.vars = c("time", "age", "weight", "height"))

##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54

dcast()

smiths.long <- melt(data = smiths,
     id.vars = "subject",
     measure.vars = c("time", "age", "weight", "height"),
     variable.name = "var",
     value.name = "val")
dcast(data = smiths.long, formula = subject ~ var,
      value.var = "val")

##      subject time age weight height
## 1 John Smith    1  33     90   1.87
## 2 Mary Smith    1  NA     NA   1.54

head(airquality)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

aq.long <- melt(airquality,
                id.vars = c("Month", "Day"))
head(aq.long)

##   Month Day variable value
## 1     5   1    Ozone    41
## 2     5   2    Ozone    36
## 3     5   3    Ozone    12
## 4     5   4    Ozone    18
## 5     5   5    Ozone    NA
## 6     5   6    Ozone    28

aq.wide <- dcast(aq.long,
                 Month + Day ~ variable,
                 valaue.var = "value")
head(aq.wide)

##   Month Day Ozone Solar.R Wind Temp
## 1     5   1    41     190  7.4   67
## 2     5   2    36     118  8.0   72
## 3     5   3    12     149 12.6   74
## 4     5   4    18     313 11.5   62
## 5     5   5    NA      NA 14.3   56
## 6     5   6    28      NA 14.9   66

dcast(aq.long, Month ~ variable)

## Aggregation function missing: defaulting to length

##   Month Ozone Solar.R Wind Temp
## 1     5    31      31   31   31
## 2     6    30      30   30   30
## 3     7    31      31   31   31
## 4     8    31      31   31   31
## 5     9    30      30   30   30

dcast(aq.long, Month ~ variable,
      fun.aggregate = mean, na.rm = TRUE)

##   Month    Ozone  Solar.R      Wind     Temp
## 1     5 23.61538 181.2963 11.622581 65.54839
## 2     6 29.44444 190.1667 10.266667 79.10000
## 3     7 59.11538 216.4839  8.941935 83.90323
## 4     8 59.96154 171.8571  8.793548 83.96774
## 5     9 31.44828 167.4333 10.180000 76.90000

library(tidyr)

gather()

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  Ozone:Temp)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  -Month, -Day)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  1:4)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  Ozone, Solar.R, Wind, Temp)

spread()

spread(data = aq.long,
       key = Factor,
       value = Measurement)

##     Month Day Ozone Solar.R Temp Wind
## 1       5   1    41     190   67  7.4
## 2       5   2    36     118   72  8.0
## 3       5   3    12     149   74 12.6
## 4       5   4    18     313   62 11.5
## 5       5   5    NA      NA   56 14.3
## 6       5   6    28      NA   66 14.9
## 7       5   7    23     299   65  8.6
## 8       5   8    19      99   59 13.8
## 9       5   9     8      19   61 20.1
## 10      5  10    NA     194   69  8.6
## 11      5  11     7      NA   74  6.9
## 12      5  12    16     256   69  9.7
## 13      5  13    11     290   66  9.2
## 14      5  14    14     274   68 10.9
## 15      5  15    18      65   58 13.2
## 16      5  16    14     334   64 11.5
## 17      5  17    34     307   66 12.0
## 18      5  18     6      78   57 18.4
## 19      5  19    30     322   68 11.5
## 20      5  20    11      44   62  9.7
## 21      5  21     1       8   59  9.7
## 22      5  22    11     320   73 16.6
## 23      5  23     4      25   61  9.7
## 24      5  24    32      92   61 12.0
## 25      5  25    NA      66   57 16.6
## 26      5  26    NA     266   58 14.9
## 27      5  27    NA      NA   57  8.0
## 28      5  28    23      13   67 12.0
## 29      5  29    45     252   81 14.9
## 30      5  30   115     223   79  5.7
## 31      5  31    37     279   76  7.4
## 32      6   1    NA     286   78  8.6
## 33      6   2    NA     287   74  9.7
## 34      6   3    NA     242   67 16.1
## 35      6   4    NA     186   84  9.2
## 36      6   5    NA     220   85  8.6
## 37      6   6    NA     264   79 14.3
## 38      6   7    29     127   82  9.7
## 39      6   8    NA     273   87  6.9
## 40      6   9    71     291   90 13.8
## 41      6  10    39     323   87 11.5
## 42      6  11    NA     259   93 10.9
## 43      6  12    NA     250   92  9.2
## 44      6  13    23     148   82  8.0
## 45      6  14    NA     332   80 13.8
## 46      6  15    NA     322   79 11.5
## 47      6  16    21     191   77 14.9
## 48      6  17    37     284   72 20.7
## 49      6  18    20      37   65  9.2
## 50      6  19    12     120   73 11.5
## 51      6  20    13     137   76 10.3
## 52      6  21    NA     150   77  6.3
## 53      6  22    NA      59   76  1.7
## 54      6  23    NA      91   76  4.6
## 55      6  24    NA     250   76  6.3
## 56      6  25    NA     135   75  8.0
## 57      6  26    NA     127   78  8.0
## 58      6  27    NA      47   73 10.3
## 59      6  28    NA      98   80 11.5
## 60      6  29    NA      31   77 14.9
## 61      6  30    NA     138   83  8.0
## 62      7   1   135     269   84  4.1
## 63      7   2    49     248   85  9.2
## 64      7   3    32     236   81  9.2
## 65      7   4    NA     101   84 10.9
## 66      7   5    64     175   83  4.6
## 67      7   6    40     314   83 10.9
## 68      7   7    77     276   88  5.1
## 69      7   8    97     267   92  6.3
## 70      7   9    97     272   92  5.7
## 71      7  10    85     175   89  7.4
## 72      7  11    NA     139   82  8.6
## 73      7  12    10     264   73 14.3
## 74      7  13    27     175   81 14.9
## 75      7  14    NA     291   91 14.9
## 76      7  15     7      48   80 14.3
## 77      7  16    48     260   81  6.9
## 78      7  17    35     274   82 10.3
## 79      7  18    61     285   84  6.3
## 80      7  19    79     187   87  5.1
## 81      7  20    63     220   85 11.5
## 82      7  21    16       7   74  6.9
## 83      7  22    NA     258   81  9.7
## 84      7  23    NA     295   82 11.5
## 85      7  24    80     294   86  8.6
## 86      7  25   108     223   85  8.0
## 87      7  26    20      81   82  8.6
## 88      7  27    52      82   86 12.0
## 89      7  28    82     213   88  7.4
## 90      7  29    50     275   86  7.4
## 91      7  30    64     253   83  7.4
## 92      7  31    59     254   81  9.2
## 93      8   1    39      83   81  6.9
## 94      8   2     9      24   81 13.8
## 95      8   3    16      77   82  7.4
## 96      8   4    78      NA   86  6.9
## 97      8   5    35      NA   85  7.4
## 98      8   6    66      NA   87  4.6
## 99      8   7   122     255   89  4.0
## 100     8   8    89     229   90 10.3
## 101     8   9   110     207   90  8.0
## 102     8  10    NA     222   92  8.6
## 103     8  11    NA     137   86 11.5
## 104     8  12    44     192   86 11.5
## 105     8  13    28     273   82 11.5
## 106     8  14    65     157   80  9.7
## 107     8  15    NA      64   79 11.5
## 108     8  16    22      71   77 10.3
## 109     8  17    59      51   79  6.3
## 110     8  18    23     115   76  7.4
## 111     8  19    31     244   78 10.9
## 112     8  20    44     190   78 10.3
## 113     8  21    21     259   77 15.5
## 114     8  22     9      36   72 14.3
## 115     8  23    NA     255   75 12.6
## 116     8  24    45     212   79  9.7
## 117     8  25   168     238   81  3.4
## 118     8  26    73     215   86  8.0
## 119     8  27    NA     153   88  5.7
## 120     8  28    76     203   97  9.7
## 121     8  29   118     225   94  2.3
## 122     8  30    84     237   96  6.3
## 123     8  31    85     188   94  6.3
## 124     9   1    96     167   91  6.9
## 125     9   2    78     197   92  5.1
## 126     9   3    73     183   93  2.8
## 127     9   4    91     189   93  4.6
## 128     9   5    47      95   87  7.4
## 129     9   6    32      92   84 15.5
## 130     9   7    20     252   80 10.9
## 131     9   8    23     220   78 10.3
## 132     9   9    21     230   75 10.9
## 133     9  10    24     259   73  9.7
## 134     9  11    44     236   81 14.9
## 135     9  12    21     259   76 15.5
## 136     9  13    28     238   77  6.3
## 137     9  14     9      24   71 10.9
## 138     9  15    13     112   71 11.5
## 139     9  16    46     237   78  6.9
## 140     9  17    18     224   67 13.8
## 141     9  18    13      27   76 10.3
## 142     9  19    24     238   68 10.3
## 143     9  20    16     201   82  8.0
## 144     9  21    13     238   64 12.6
## 145     9  22    23      14   71  9.2
## 146     9  23    36     139   81 10.3
## 147     9  24     7      49   69 10.3
## 148     9  25    14      20   63 16.6
## 149     9  26    30     193   70  6.9
## 150     9  27    NA     145   77 13.2
## 151     9  28    14     191   75 14.3
## 152     9  29    18     131   76  8.0
## 153     9  30    20     223   68 11.5

aq.long %>% 
  spread(key = Factor, value = Measurement)

##     Month Day Ozone Solar.R Temp Wind
## 1       5   1    41     190   67  7.4
## 2       5   2    36     118   72  8.0
## 3       5   3    12     149   74 12.6
## 4       5   4    18     313   62 11.5
## 5       5   5    NA      NA   56 14.3
## 6       5   6    28      NA   66 14.9
## 7       5   7    23     299   65  8.6
## 8       5   8    19      99   59 13.8
## 9       5   9     8      19   61 20.1
## 10      5  10    NA     194   69  8.6
## 11      5  11     7      NA   74  6.9
## 12      5  12    16     256   69  9.7
## 13      5  13    11     290   66  9.2
## 14      5  14    14     274   68 10.9
## 15      5  15    18      65   58 13.2
## 16      5  16    14     334   64 11.5
## 17      5  17    34     307   66 12.0
## 18      5  18     6      78   57 18.4
## 19      5  19    30     322   68 11.5
## 20      5  20    11      44   62  9.7
## 21      5  21     1       8   59  9.7
## 22      5  22    11     320   73 16.6
## 23      5  23     4      25   61  9.7
## 24      5  24    32      92   61 12.0
## 25      5  25    NA      66   57 16.6
## 26      5  26    NA     266   58 14.9
## 27      5  27    NA      NA   57  8.0
## 28      5  28    23      13   67 12.0
## 29      5  29    45     252   81 14.9
## 30      5  30   115     223   79  5.7
## 31      5  31    37     279   76  7.4
## 32      6   1    NA     286   78  8.6
## 33      6   2    NA     287   74  9.7
## 34      6   3    NA     242   67 16.1
## 35      6   4    NA     186   84  9.2
## 36      6   5    NA     220   85  8.6
## 37      6   6    NA     264   79 14.3
## 38      6   7    29     127   82  9.7
## 39      6   8    NA     273   87  6.9
## 40      6   9    71     291   90 13.8
## 41      6  10    39     323   87 11.5
## 42      6  11    NA     259   93 10.9
## 43      6  12    NA     250   92  9.2
## 44      6  13    23     148   82  8.0
## 45      6  14    NA     332   80 13.8
## 46      6  15    NA     322   79 11.5
## 47      6  16    21     191   77 14.9
## 48      6  17    37     284   72 20.7
## 49      6  18    20      37   65  9.2
## 50      6  19    12     120   73 11.5
## 51      6  20    13     137   76 10.3
## 52      6  21    NA     150   77  6.3
## 53      6  22    NA      59   76  1.7
## 54      6  23    NA      91   76  4.6
## 55      6  24    NA     250   76  6.3
## 56      6  25    NA     135   75  8.0
## 57      6  26    NA     127   78  8.0
## 58      6  27    NA      47   73 10.3
## 59      6  28    NA      98   80 11.5
## 60      6  29    NA      31   77 14.9
## 61      6  30    NA     138   83  8.0
## 62      7   1   135     269   84  4.1
## 63      7   2    49     248   85  9.2
## 64      7   3    32     236   81  9.2
## 65      7   4    NA     101   84 10.9
## 66      7   5    64     175   83  4.6
## 67      7   6    40     314   83 10.9
## 68      7   7    77     276   88  5.1
## 69      7   8    97     267   92  6.3
## 70      7   9    97     272   92  5.7
## 71      7  10    85     175   89  7.4
## 72      7  11    NA     139   82  8.6
## 73      7  12    10     264   73 14.3
## 74      7  13    27     175   81 14.9
## 75      7  14    NA     291   91 14.9
## 76      7  15     7      48   80 14.3
## 77      7  16    48     260   81  6.9
## 78      7  17    35     274   82 10.3
## 79      7  18    61     285   84  6.3
## 80      7  19    79     187   87  5.1
## 81      7  20    63     220   85 11.5
## 82      7  21    16       7   74  6.9
## 83      7  22    NA     258   81  9.7
## 84      7  23    NA     295   82 11.5
## 85      7  24    80     294   86  8.6
## 86      7  25   108     223   85  8.0
## 87      7  26    20      81   82  8.6
## 88      7  27    52      82   86 12.0
## 89      7  28    82     213   88  7.4
## 90      7  29    50     275   86  7.4
## 91      7  30    64     253   83  7.4
## 92      7  31    59     254   81  9.2
## 93      8   1    39      83   81  6.9
## 94      8   2     9      24   81 13.8
## 95      8   3    16      77   82  7.4
## 96      8   4    78      NA   86  6.9
## 97      8   5    35      NA   85  7.4
## 98      8   6    66      NA   87  4.6
## 99      8   7   122     255   89  4.0
## 100     8   8    89     229   90 10.3
## 101     8   9   110     207   90  8.0
## 102     8  10    NA     222   92  8.6
## 103     8  11    NA     137   86 11.5
## 104     8  12    44     192   86 11.5
## 105     8  13    28     273   82 11.5
## 106     8  14    65     157   80  9.7
## 107     8  15    NA      64   79 11.5
## 108     8  16    22      71   77 10.3
## 109     8  17    59      51   79  6.3
## 110     8  18    23     115   76  7.4
## 111     8  19    31     244   78 10.9
## 112     8  20    44     190   78 10.3
## 113     8  21    21     259   77 15.5
## 114     8  22     9      36   72 14.3
## 115     8  23    NA     255   75 12.6
## 116     8  24    45     212   79  9.7
## 117     8  25   168     238   81  3.4
## 118     8  26    73     215   86  8.0
## 119     8  27    NA     153   88  5.7
## 120     8  28    76     203   97  9.7
## 121     8  29   118     225   94  2.3
## 122     8  30    84     237   96  6.3
## 123     8  31    85     188   94  6.3
## 124     9   1    96     167   91  6.9
## 125     9   2    78     197   92  5.1
## 126     9   3    73     183   93  2.8
## 127     9   4    91     189   93  4.6
## 128     9   5    47      95   87  7.4
## 129     9   6    32      92   84 15.5
## 130     9   7    20     252   80 10.9
## 131     9   8    23     220   78 10.3
## 132     9   9    21     230   75 10.9
## 133     9  10    24     259   73  9.7
## 134     9  11    44     236   81 14.9
## 135     9  12    21     259   76 15.5
## 136     9  13    28     238   77  6.3
## 137     9  14     9      24   71 10.9
## 138     9  15    13     112   71 11.5
## 139     9  16    46     237   78  6.9
## 140     9  17    18     224   67 13.8
## 141     9  18    13      27   76 10.3
## 142     9  19    24     238   68 10.3
## 143     9  20    16     201   82  8.0
## 144     9  21    13     238   64 12.6
## 145     9  22    23      14   71  9.2
## 146     9  23    36     139   81 10.3
## 147     9  24     7      49   69 10.3
## 148     9  25    14      20   63 16.6
## 149     9  26    30     193   70  6.9
## 150     9  27    NA     145   77 13.2
## 151     9  28    14     191   75 14.3
## 152     9  29    18     131   76  8.0
## 153     9  30    20     223   68 11.5

separate()

iris.long <- gather(iris, Element, Measurement, -Species)
tail(iris.long)

##       Species     Element Measurement
## 595 virginica Petal.Width         2.5
## 596 virginica Petal.Width         2.3
## 597 virginica Petal.Width         1.9
## 598 virginica Petal.Width         2.0
## 599 virginica Petal.Width         2.3
## 600 virginica Petal.Width         1.8

iris.sep <- separate(data = iris.long,
                     col = Element,
                     into = c("Part", "Measures"))
tail(iris.sep)

##       Species  Part Measures Measurement
## 595 virginica Petal    Width         2.5
## 596 virginica Petal    Width         2.3
## 597 virginica Petal    Width         1.9
## 598 virginica Petal    Width         2.0
## 599 virginica Petal    Width         2.3
## 600 virginica Petal    Width         1.8

unite()

iris.unite <- unite(data = iris.sep,
                    col = Factor,
                    Part, Measures,
                    sep = "_")
tail(iris.unite)

##       Species      Factor Measurement
## 595 virginica Petal_Width         2.5
## 596 virginica Petal_Width         2.3
## 597 virginica Petal_Width         1.9
## 598 virginica Petal_Width         2.0
## 599 virginica Petal_Width         2.3
## 600 virginica Petal_Width         1.8

ggplot2

Grammar of Graphics

library(ggplot2)

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_point() +
  labs(x = "weight (1,000 lbs)",
       y = "Fuel Consumption (miles per gallon)",
       title = "Fuel Consumption vs. Weight",
       subtitle = "Negative relationship betweeen fuel efficiency and car weight",
       caption = "Source: mpg dataset")

ggplot(data = mtcars, aes(x = mpg)) +
  geom_histogram() +
  facet_grid(cyl ~ .) +
  labs(title = "geom_histogram()",
       x = "Miles per Gallon")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mtcars$cyl <- factor(mtcars$cyl,
                     levels = c(4, 6, 8),
                     labels = c("4 cylinders", "6 cylinders", "8 cylinders"))
ggplot(data = mtcars, aes(x = cyl, y = mpg)) +
  geom_boxplot() +
  labs(title = "geom_boxplot()",
       x = "Number of Cylinders",
       y = "Miles per Gallon")

ggplot(data = mtcars, aes(x = mpg, fill = cyl)) +
  geom_density() +
  labs(title = "geom_density()",
       x = "Miles per Gallon")

ggplot(data = mtcars, aes(x = wt, y = mpg, col = cyl)) +
  geom_point() +
  labs(title = "geom_point()",
       x = "Weight (1,000 lbs)",
       y = "Miles per Gallon")

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_smooth() +
  labs(title = "geom_smooth()",
       x = "Weight (1,000lbs)",
       y = "Miles per Gallon")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = economics, aes(x = date, y = unemploy)) +
  geom_line() +
  labs(title = "geom_line()",
       x = "Year", y = "Number of Unemployed (thousands)")

geom()

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_point(shape = 21,
             color = "blue",
             bg = "skyblue", # 내부 색
             size = 2,
             stroke = 1) + # 외부 라인 두께
  geom_smooth(method = "lm", # 회귀 방법
              color = "red",
              linetype = 2,
              size = 1) +
  geom_text(label = rownames(mtcars),
            hjust = 0,
            vjust = 0,
            nudge_y = 0.7,
            size =2) +
  labs(x = "weight (1,000 lbs)",
       y = "Fuel Consumption (miles per gallon)",
       title = "Fuel Consumption vs. Weight",
       subtitle = "Negative relationship betweeen fuel efficiency and car weight",
       caption = "Source: mpg dataset")

## `geom_smooth()` using formula 'y ~ x'

geom_boxplot()

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

str(Salaries)

## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...

ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot(fill = "salmon",
               color = "dimgray",
               notch = TRUE) +
  geom_point(position = "jitter", # 퍼트리기
             color = "royalblue",
             alpha = 0.5) + # 투명도
  geom_rug(sides = "l",
           color = "dimgray")

geom_violin()

library(lattice)
head(singer)

##   height voice.part
## 1     64  Soprano 1
## 2     62  Soprano 1
## 3     66  Soprano 1
## 4     65  Soprano 1
## 5     60  Soprano 1
## 6     61  Soprano 1

ggplot(singer, aes(x = voice.part, y = height)) +
  geom_violin(fill = "honeydew2") +
  geom_boxplot(fill = "lightgreen", width = 0.2)

geom_density()

library(car)
str(Salaries)

## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...

ggplot(Salaries, aes(x = salary, fill = rank)) +
  geom_density(alpha = 0.5)

ggplot(Salaries, aes(x = yrs.since.phd,
                     y = salary,
                     color = rank,
                     shape = sex)) +
  geom_point()

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "stack")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "dodge")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "fill") + # stack, dodge, fill
  labs(y = "Proportion")

geom_bar()

presummed <- data.frame(Grade = c("A", "B", "C", "D", "F"),
                        Frequency = c(20, 40, 20, 10, 5))
presummed

##   Grade Frequency
## 1     A        20
## 2     B        40
## 3     C        20
## 4     D        10
## 5     F         5

ggplot(presummed, aes(x = Grade, y = Frequency)) +
  geom_bar(stat = "identity")

ggplot(presummed, aes(x = Grade, y = Frequency)) +
  geom_col()

facet_wrap(), facet_grid()

library(lattice)
head(singer)

##   height voice.part
## 1     64  Soprano 1
## 2     62  Soprano 1
## 3     66  Soprano 1
## 4     65  Soprano 1
## 5     60  Soprano 1
## 6     61  Soprano 1

ggplot(singer, aes(x = height)) +
  geom_histogram() +
  facet_wrap(~ voice.part, nrow = 4)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(singer, aes(x = height, fill = voice.part)) +
  geom_density() +
  facet_grid(voice.part ~ .)

ggplot(Salaries, aes(x = yrs.since.phd, y = salary)) +
  geom_point() +
  facet_grid(sex ~ rank)

ggplot(Salaries, aes(x = yrs.since.phd,
                     y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point() +
  facet_grid(. ~ sex)

scale_<aesthetic>_<name/data type>

library(car)
str(Salaries)

## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...

ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  scale_x_discrete(breaks = c("AsstProf", "AssocProf", "Prof"),
                   labels = c("Assistant\nProfessor",
                              "Associate\nProfessor",
                              "Professor")) +
  scale_y_continuous(breaks = c(50000, 100000, 150000, 200000),
                     labels = c("$50k", "$100k", "$150k", "$200k")) +
  labs(fill = "Gender")

ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  scale_x_discrete(breaks = c("AsstProf", "AssocProf", "Prof"),
                   labels = c("Assistant\nProfessor",
                              "Associate\nProfessor",
                              "Professor")) +
  scale_y_continuous(breaks = c(50000, 100000, 150000, 200000),
                     labels = c("$50k", "$100k", "$150k", "$200k")) +
  scale_fill_discrete(name = "Gender") + # 범례명
  theme(legend.position = c(0.15, 0.75)) # 범례위치

data(mtcars)

ggplot(mtcars, aes(x = wt, y = mpg,
                   shape = factor(cyl),
                   color = factor(cyl)))  +
  geom_point() +
  labs(shape = "Cylinder",
       color = "Cylinder")

ggplot(mtcars, aes(x = wt, y = mpg,
                   shape = factor(cyl),
                   color = factor(cyl)))  +
  geom_point() +
  scale_shape_discrete(name = "Cylinder") +
  scale_color_discrete(name = "Cylinder")

ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
  geom_point(shape = 21,
             color = "black",
             fill = "wheat") +
  labs(size = "Engine\nDisplacement")

ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
  geom_point(shape = 21,
             color = "black",
             fill = "wheat") +
  scale_size_continuous(name = "Engine\nDisplacement")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar() +
  scale_fill_manual(values = c("tomato", "cornflowerblue"))

ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank)) +
  geom_point(size = 2) +
  scale_color_manual(values = c("orange", "violetred", "steelblue"))

ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank)) +
  geom_point(size = 2) +
  scale_color_brewer(palette = "Accent")

ggplot(mtcars, aes(x = wt, y = mpg, color = disp)) +
  geom_point() +
  scale_color_gradient2()

ggplot(Salaries, aes(x = yrs.since.phd, y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point(size = 2) +
  scale_shape_manual(values = c(15, 17, 19))

theme_<theme>

library(car)
str(Salaries)

## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...

ggplot(Salaries, aes(x = yrs.since.phd, y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point() +
  facet_grid(. ~ sex) +
  theme_light() # default: theme_gray()

ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  labs(title = "Salary by Rank and Sex",
       x = "Rank",
       y = "Salary") +
  theme(plot.title = element_text(face = "bold.italic",
                                  size = 14,
                                  color = "brown"),
        axis.title = element_text(face = "bold.italic",
                                  size = 10,
                                  color = "tomato"),
        axis.text = element_text(face = "bold",
                                  size = 9,
                                  color = "royalblue"),
        panel.background = element_rect(fill = "snow",
                                        color = "darkblue"),
        panel.grid.major.y = element_line(color = "gray",
                                          linetype = "solid"),
        panel.grid.minor.y = element_line(color = "gray",
                                          linetype = "dashed"),
        legend.position = "top")

mytheme <- theme(plot.title = element_text(face = "bold.italic",
                                  size = 14,
                                  color = "brown"),
        axis.title = element_text(face = "bold.italic",
                                  size = 10,
                                  color = "tomato"),
        axis.text = element_text(face = "bold",
                                  size = 9,
                                  color = "royalblue"),
        panel.background = element_rect(fill = "snow",
                                        color = "darkblue"),
        panel.grid.major.y = element_line(color = "gray",
                                          linetype = "solid"),
        panel.grid.minor.y = element_line(color = "gray",
                                          linetype = "dashed"),
        legend.position = "top")

library(lattice)
ggplot(singer, aes(x = voice.part, y = height)) +
  geom_boxplot() +
  labs(title = "Height by voice part",
       x = "Voice Part",
       y = "Height") +
  mytheme

library(gridExtra)

library(ggplot2)
library(car)

p1 <- ggplot(Salaries, aes(x = rank)) +
  geom_bar(fill = "steelblue")

p2 <- ggplot(Salaries, aes(x = salary)) +
  geom_histogram(fill = "maroon")

p3 <- ggplot(Salaries, aes(x = yrs.since.phd, y = salary)) +
  geom_point(color = "orange")

p4 <- ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot(fill = "mistyrose")

# install.packages("gridExtra")
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

myggplot <- grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file = "myplot.png",
       plot = myggplot,
       width = 7.0, # inch
       height = 5.5)

ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot()

ggsave(file = "myplot2.png")

## Saving 7 x 5 in image

?par
colors()
?RColorBrewer
# http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf

질문은 여기

이 노트 관련 문의는 제이스’s 블로그 에 댓글로 부탁드립니다.

R 프로그래밍 기초

Jace

2020. 07. 31 (최근수정일)

R 프로그래밍 기초, 따라하며 배우기

벡터, 팩터

c(), seq(), rep(), str(), length()

연산자

벡터 함수

벡터 인덱싱

sweq_along(), which(), names()

팩터

행렬, 배열

dim(), matrix()

rbind(), cbind()

연산

rowSums(), colSums(), rowMeans(), colMean(), t()

인덱싱

colnames(), rownames()

배열

리스트

list(), names(), length(), unlist()

인덱싱

데이터프레임

data.frame()

rbind(), cbind()

do.call()

인덱싱

with(), within()

attach(), detach()

subset(), cor()

sqldf()

텍스트

nchar(), strsplit(), paste(), outer()

sprintf()

substr(), substring(), grep(), sub()

정규표현식

library(base)

grep(), grepl()

regexpr(), gregexpr(), rematches()

sub(), gsub(), strsplit()

library(stringr)

ste_detect()

str_locate(), str_locate_all(), str_extract(), str_extract_all()

str_match(), str_match_all(), str_replace(), str_replace_all()

str_split(), str_length(), str_count()

str_pad(), str_trim()

str_c()

파일 읽기

read.csv(), read.table(), read.fwf()

readLines(), scan()

library(openxlsx)

날짜

Sys.Date(), date(), Sys.time(), weekdays()

months(), quarters(), Sys.setlocale()

as.POSIXct(), as.POSIXlt(), strptime()

format(), ISOdate(), difftime()

함수

function(), ls(), rm()

논리흐름 제어

if(), ifelse()

switch()

repeat(), while(), for()

서브셋

subset()

sample(), set.seed()

duplicated(), which(), unique()

complete.cases(), na.omit(), cut()

반복 적용

apply()

2D 행렬에서

3D 배열에서

4D 배열에서

lapply(), sapply()

행렬, 배열에서

데이터프레임에서

집단 요약

sapply()

unstack()

tapply()

aggregate()