R 프로그래밍 기초, 따라하며 배우기

R에 기본을 되짚어 보기에 찾아본 강의 중 최고는 곽기영 교수님의 유튜브 강의입니다. R 프로그래밍 / R 기초 by 곽기영 on Youtube (우클릭 새창으로 여세요) 확실한 기본을 잡고자 한다면 강의 보며, 따라하기 강력 추천! 여기는 따라하며 해본것들 한번에 훓어보기 위한 기록입니다.

이 노트 관련 문의는 제이스’s 블로그 에 댓글로 부탁드립니다.

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see https://rmarkdown.rstudio.com.

벡터, 팩터

c(), seq(), rep(), str(), length()

c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
##  [1]  1  2  3  4  5  6  7  8  9 10
c("we", "love", "data", "analytics")
## [1] "we"        "love"      "data"      "analytics"
c(TRUE, FALSE, TRUE, FALSE)
## [1]  TRUE FALSE  TRUE FALSE
odd <- c(1, 3, 5)
even <- c(2, 4, 6)
odd
## [1] 1 3 5
even
## [1] 2 4 6
c(odd, even)
## [1] 1 3 5 2 4 6
3:9
## [1] 3 4 5 6 7 8 9
9:3
## [1] 9 8 7 6 5 4 3
5:-3
## [1]  5  4  3  2  1  0 -1 -2 -3
seq(from = 3, to = 9)
## [1] 3 4 5 6 7 8 9
seq(from = 3, to = 9, by = 2)
## [1] 3 5 7 9
seq(from = 1.5, to = 1.5, by = -0.5)
## [1] 1.5
seq(from = 0, to = 100, length.out = 5)
## [1]   0  25  50  75 100
seq(from = -1, to = 1, length.out = 5)
## [1] -1.0 -0.5  0.0  0.5  1.0
rep(1, times = 3)
## [1] 1 1 1
rep(c(1, 2, 3), times = 3)
## [1] 1 2 3 1 2 3 1 2 3
rep(c(1, 2, 3), each = 3)
## [1] 1 1 1 2 2 2 3 3 3
rep(c(1, 2, 3), times = c(1, 2, 3))
## [1] 1 2 2 3 3 3
rep(c(1, 2, 3), length.out = 8)
## [1] 1 2 3 1 2 3 1 2
num <- c(1, 2, 3)
cha <- c("x", "y", "z")
c(num, cha)
## [1] "1" "2" "3" "x" "y" "z"
str(num)
##  num [1:3] 1 2 3
str(cha)
##  chr [1:3] "x" "y" "z"
length(num)
## [1] 3
LETTERS
##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
letters
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
month.name
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"
month.abb
##  [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
pi
## [1] 3.141593
month <- c(12, 9, 3, 5, 1)
month
## [1] 12  9  3  5  1
month.name[month]
## [1] "December"  "September" "March"     "May"       "January"

연산자

1 + 2
## [1] 3
"+"(1, 2)
## [1] 3
c(1, 2, 3) + c(4, 5, 6)
## [1] 5 7 9
c(1, 2, 3) * c(4, 5, 6)
## [1]  4 10 18
c(10, 20, 30) / c(2, 4, 6)
## [1] 5 5 5
c(10, 20, 30) %% c(3, 5, 7)
## [1] 1 0 2
c(10, 20, 30) %/% c(3, 5, 7)
## [1] 3 4 4
c(1, 2, 3) + c(4, 5, 6, 7, 8, 9)
## [1]  5  7  9  8 10 12
c(1, 2, 3, 1, 2, 3) + c(4, 5, 6, 7, 8, 9)
## [1]  5  7  9  8 10 12
c(1, 3, 5) + 10
## [1] 11 13 15
c(1, 2, 3) + c(4, 5, 6, 7, 8)
## Warning in c(1, 2, 3) + c(4, 5, 6, 7, 8): 두 객체의 길이가 서로 배수관계에 있지
## 않습니다
## [1]  5  7  9  8 10
v <- pi
w <- 10 / 3

v == w
## [1] FALSE
v != w
## [1] TRUE
v > w
## [1] FALSE
v < w
## [1] TRUE
!(v > w)
## [1] TRUE
(v == w) | (v < w)
## [1] TRUE
(v == w) & (v < w)
## [1] FALSE
isTRUE(v == w)
## [1] FALSE
y <- c(0, 25, 50, 75, 100)
z <- c(50, 50, 50, 50, 50)
y == z
## [1] FALSE FALSE  TRUE FALSE FALSE
y != z
## [1]  TRUE  TRUE FALSE  TRUE  TRUE
y > z
## [1] FALSE FALSE FALSE  TRUE  TRUE
y < z
## [1]  TRUE  TRUE FALSE FALSE FALSE
y == 50
## [1] FALSE FALSE  TRUE FALSE FALSE
y > 50
## [1] FALSE FALSE FALSE  TRUE  TRUE
as.numeric(TRUE)
## [1] 1
as.numeric(FALSE)
## [1] 0
TRUE * TRUE
## [1] 1
TRUE * FALSE
## [1] 0
TRUE + TRUE
## [1] 2
y <-  c(0, 25, 50, 75, 100)
y > 50
## [1] FALSE FALSE FALSE  TRUE  TRUE
sum(y > 50)
## [1] 2
any(-3:3 > 0)
## [1] TRUE
all(-3:3 > 0)
## [1] FALSE
sqrt(2) ^ 2 == 2
## [1] FALSE
sqrt(2) ^ 2 - 2
## [1] 4.440892e-16
identical(sqrt(2) ^ 2 , 2)
## [1] FALSE
all.equal(sqrt(2) ^ 2 , 2)
## [1] TRUE
all.equal(sqrt(2) ^ 2, 3)
## [1] "Mean relative difference: 0.5"
isTRUE(all.equal(sqrt(2) ^ 2, 3))
## [1] FALSE
fruit <- c("Apple", "Banana", "Strawberry")
food <- c("Pie", "Juice", "Cake")
paste(fruit, food)
## [1] "Apple Pie"       "Banana Juice"    "Strawberry Cake"
paste(fruit, "Juice")
## [1] "Apple Juice"      "Banana Juice"     "Strawberry Juice"

벡터 함수

abs(-3:3)
## [1] 3 2 1 0 1 2 3
log(1:5) # 자연로그
## [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379
log(1:5, base = exp(1))
## [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379
log2(1:5)
## [1] 0.000000 1.000000 1.584963 2.000000 2.321928
log10(1:10)
##  [1] 0.0000000 0.3010300 0.4771213 0.6020600 0.6989700 0.7781513 0.8450980
##  [8] 0.9030900 0.9542425 1.0000000
exp(1:5) # e ^ 1:5
## [1]   2.718282   7.389056  20.085537  54.598150 148.413159
y <- exp(1:5)
y
## [1]   2.718282   7.389056  20.085537  54.598150 148.413159
log(y)
## [1] 1 2 3 4 5
factorial(1:5)
## [1]   1   2   6  24 120
choose(5, 2) # 조합
## [1] 10
sqrt(1:5)
## [1] 1.000000 1.414214 1.732051 2.000000 2.236068
options("digits") # 유효자리수
## $digits
## [1] 7
pi
## [1] 3.141593
pi * 100
## [1] 314.1593
signif(456.789, digits = 2) # 보다 작지 않은 정수
## [1] 460
signif(456.789, digits = 3)
## [1] 457
signif(456.789, digits = 4)
## [1] 456.8
round(456.789, digits = 2) # 반올림
## [1] 456.79
round(456.789, digits = 1)
## [1] 456.8
round(456.789)
## [1] 457
round(sqrt(1:5), digits = 2)
## [1] 1.00 1.41 1.73 2.00 2.24
round(456.789, digits = -2)
## [1] 500
round(456.789, digits = -1)
## [1] 460
round(11.5) # 가까운 짝수
## [1] 12
round(10.5)
## [1] 10
round(12.5)
## [1] 12
round(13.5)
## [1] 14
round(-3.5)
## [1] -4
round(-4.5)
## [1] -4
floor(456.789) # 내림
## [1] 456
floor(-456.789) # 내림
## [1] -457
ceiling(456.789) # 올림
## [1] 457
ceiling(-456.789) # 올림
## [1] -456
trunc(456.789) # 0에 가까운 정수
## [1] 456
trunc(-456.789) # 0에 가까운 정수
## [1] -456
3 / 0
## [1] Inf
5 - Inf
## [1] -Inf
Inf * Inf # Inf 1.8 * 10 ^ 308
## [1] Inf
Inf * -Inf
## [1] -Inf
is.infinite(10 ^ (305:310))
## [1] FALSE FALSE FALSE FALSE  TRUE  TRUE
Inf / Inf
## [1] NaN
Inf * 0
## [1] NaN
log(-2)
## Warning in log(-2): NaN이 생성되었습니다
## [1] NaN
NaN + 3 # Not a Number
## [1] NaN
is.nan(NaN + 3)
## [1] TRUE
k <- NA # Not Available
k
## [1] NA
k + 5
## [1] NA
sqrt(k)
## [1] NA
is.na(k)
## [1] TRUE
is.na(k + 5)
## [1] TRUE
is.na(NaN)
## [1] TRUE
z <- 1:5
z
## [1] 1 2 3 4 5
sum(z)
## [1] 15
prod(z)
## [1] 120
max(z)
## [1] 5
min(z)
## [1] 1
w <- c(1, 2, 3, 4, 5, NA)
sum(w, na.rm = TRUE)
## [1] 15
na.omit(w)
## [1] 1 2 3 4 5
## attr(,"na.action")
## [1] 6
## attr(,"class")
## [1] "omit"
sum(na.omit(w))
## [1] 15
v <- c(NA, NA, NA, NA, NA)
v
## [1] NA NA NA NA NA
sum(v, na.rm = TRUE)
## [1] 0
prod(v, na.rm = TRUE)
## [1] 1
max(v, na.rm = TRUE)
## Warning in max(v, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를 반
## 환합니다
## [1] -Inf
min(v, na.rm = TRUE)
## Warning in min(v, na.rm = TRUE): min에 전달되는 인자들 중 누락이 있어 Inf를 반환
## 합니다
## [1] Inf
traffic.death <- c(842, 729, 786, 751, 844, 851, 702)
cumsum(traffic.death) # 누적
## [1]  842 1571 2357 3108 3952 4803 5505
cumprod(traffic.death)
## [1] 8.420000e+02 6.138180e+05 4.824609e+08 3.623282e+11 3.058050e+14
## [6] 2.602400e+17 1.826885e+20
cummax(traffic.death)
## [1] 842 842 842 842 844 851 851
cummin(traffic.death)
## [1] 842 729 729 729 729 729 702
cumsum(c(3, 5, 1, NA, 2))
## [1]  3  8  9 NA NA
traffic.death
## [1] 842 729 786 751 844 851 702
diff(traffic.death)
## [1] -113   57  -35   93    7 -149
diff(c(3, 5, 1, NA, 2))
## [1]  2 -4 NA NA
diff(1:5, lag = 2) # 3-1, 4-2, 5-3
## [1] 2 2 2
p <- 1:10
q <- 6:15
union(p, q) # 합집합
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
intersect(p, q) # 교집합
## [1]  6  7  8  9 10
setdiff(p, q) # 차집합
## [1] 1 2 3 4 5
setequal(p, q)
## [1] FALSE
is.element(setdiff(p, q), p) # 포함, 부분집합
## [1] TRUE TRUE TRUE TRUE TRUE
is.element(setdiff(p, q), q) # 포함, 부분집합
## [1] FALSE FALSE FALSE FALSE FALSE

벡터 인덱싱

num <- 0:30
num
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [26] 25 26 27 28 29 30
prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime
## [1]  2  3  5  7 11 13 17 19
prime[1]
## [1] 2
prime[2]
## [1] 3
prime[1:3]
## [1] 2 3 5
prime[4:6]
## [1]  7 11 13
prime[c(1, 1, 5, 5)]
## [1]  2  2 11 11
prime[c(1, 3, 5, 7)]
## [1]  2  5 11 17
prime[c(7, 5, 3, 1)]
## [1] 17 11  5  2
indices <- c(1, 3, 5, 7)
prime[indices]
## [1]  2  5 11 17
prime[-1]
## [1]  3  5  7 11 13 17 19
prime[-1:-3]
## [1]  7 11 13 17 19
prime[-(1:3)]
## [1]  7 11 13 17 19
length(prime)
## [1] 8
prime[1:length(prime) - 1]
## [1]  2  3  5  7 11 13 17
prime[-length(prime)]
## [1]  2  3  5  7 11 13 17
prime <- c(2, 4, 5, 7, 11, 14, 17, 18)
prime
## [1]  2  4  5  7 11 14 17 18
prime[2] <- 3
prime
## [1]  2  3  5  7 11 14 17 18
prime[c(6, 8)] <- c(13, 19)
prime
## [1]  2  3  5  7 11 13 17 19
prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime
## [1]  2  3  5  7 11 13 17 19
length(prime)
## [1] 8
prime[9] <- 23
prime
## [1]  2  3  5  7 11 13 17 19 23
prime[c(10, 11)] <- c(29, 31)
prime
##  [1]  2  3  5  7 11 13 17 19 23 29 31
prime[15] <- 47
prime
##  [1]  2  3  5  7 11 13 17 19 23 29 31 NA NA NA 47
prime <- c(2, 3, 5, 7, 11, 13, 17, 19)
prime < 10
## [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
prime[prime < 10]
## [1] 2 3 5 7
prime %% 2 == 0
## [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
prime[prime %% 2 == 0]
## [1] 2

sweq_along(), which(), names()

seq_along(prime)
## [1] 1 2 3 4 5 6 7 8
seq_along(prime) %% 2 == 0
## [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
prime[seq_along(prime) %% 2 == 0]
## [1]  3  7 13 19
prime[seq_along(prime) %% 3 == 0]
## [1]  5 13
prime[c(FALSE, TRUE)]
## [1]  3  7 13 19
prime[c(FALSE, FALSE, TRUE)]
## [1]  5 13
rainfall <- c(21.6, 23.6, 45.8, 77.0, 102.2, 133.3, 327.9, 348.0,
              137.6, 49.3,53.0, 24.9)
rainfall
##  [1]  21.6  23.6  45.8  77.0 102.2 133.3 327.9 348.0 137.6  49.3  53.0  24.9
rainfall > 100
##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
which(rainfall > 100)
## [1] 5 6 7 8 9
month.name[which(rainfall > 100)]
## [1] "May"       "June"      "July"      "August"    "September"
month.abb[which(rainfall > 100)]
## [1] "May" "Jun" "Jul" "Aug" "Sep"
which.max(rainfall)
## [1] 8
month.name[which.max(rainfall)]
## [1] "August"
month.name[which.min(rainfall)]
## [1] "January"
rainfall > 100
##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
rainfall[rainfall > 100]
## [1] 102.2 133.3 327.9 348.0 137.6
rainfall[which.min(rainfall)]
## [1] 21.6
rainfall[which.max(rainfall)]
## [1] 348
traffic.death <- c(842, 729, 786, 751, 844, 851, 702)
traffic.death
## [1] 842 729 786 751 844 851 702
names(traffic.death) <- c("Mon", "Tue", "Wed", "Thu",
                          "Fri", "Sat", "Sun")
traffic.death
## Mon Tue Wed Thu Fri Sat Sun 
## 842 729 786 751 844 851 702
traffic.death["Sat"]
## Sat 
## 851
traffic.death[c("Tue", "Thu", "Sun")]
## Tue Thu Sun 
## 729 751 702
weekend <- c("Fri", "Sat", "Sun")
traffic.death[weekend]
## Fri Sat Sun 
## 844 851 702
traffic.death > 800
##   Mon   Tue   Wed   Thu   Fri   Sat   Sun 
##  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE
traffic.death[traffic.death > 800]
## Mon Fri Sat 
## 842 844 851
names(traffic.death[traffic.death > 800])
## [1] "Mon" "Fri" "Sat"

팩터

review <- c("Good", "Good", "Indifferent", "Bad", "Good", "Bad")
review
## [1] "Good"        "Good"        "Indifferent" "Bad"         "Good"       
## [6] "Bad"
review.factor <- factor(review)
review.factor
## [1] Good        Good        Indifferent Bad         Good        Bad        
## Levels: Bad Good Indifferent
review
## [1] "Good"        "Good"        "Indifferent" "Bad"         "Good"       
## [6] "Bad"
str(review)
##  chr [1:6] "Good" "Good" "Indifferent" "Bad" "Good" "Bad"
str(review.factor)
##  Factor w/ 3 levels "Bad","Good","Indifferent": 2 2 3 1 2 1
as.numeric(review.factor)
## [1] 2 2 3 1 2 1
eventday <- c("Mon", "Mon", "Tue", "Wed", "Mon",
              "Wed", "Thu", "Fri", "Tue")
eventday.factor <- factor(eventday)
eventday.factor
## [1] Mon Mon Tue Wed Mon Wed Thu Fri Tue
## Levels: Fri Mon Thu Tue Wed
eventday.factor <- factor(eventday,
                          levels = c("Mon", "Tue", "Wed", "Thu",
                                     "Fri", "Sat", "Sun"))
eventday.factor
## [1] Mon Mon Tue Wed Mon Wed Thu Fri Tue
## Levels: Mon Tue Wed Thu Fri Sat Sun
levels(review.factor)
## [1] "Bad"         "Good"        "Indifferent"
levels(review.factor) <- c("B", "G", "I")
levels(review.factor)
## [1] "B" "G" "I"
review.factor
## [1] G G I B G B
## Levels: B G I
nlevels(review.factor)
## [1] 3
length(levels(review.factor))
## [1] 3
eval <- c("Medium", "Low", "High", "Medium", "High")
eval.factor <- factor(eval)
eval.factor
## [1] Medium Low    High   Medium High  
## Levels: High Low Medium
eval.ordered <- factor(eval, levels = c("Low", "Medium", "High"),
                       ordered = TRUE)
eval.ordered
## [1] Medium Low    High   Medium High  
## Levels: Low < Medium < High
table(eval.factor)
## eval.factor
##   High    Low Medium 
##      2      1      2
table(eval.ordered)
## eval.ordered
##    Low Medium   High 
##      1      2      2
sex <- c(2, 1, 2, 2, 1, 0)
sex.factor <- factor(sex, levels = c(1, 2),
                     labels = c("Male", "Female"))
sex.factor
## [1] Female Male   Female Female Male   <NA>  
## Levels: Male Female
table(sex.factor)
## sex.factor
##   Male Female 
##      2      3

행렬, 배열

dim(), matrix()

v <- 1:12
v
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12
dim(v) <- c(3, 4)
v
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
v <- 1:12
matrix(data = v, nrow = 3, ncol = 4)
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
matrix(data = v, nrow = 3, ncol = 4, byrow = TRUE)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
rnames <- c("R1", "R2", "R3")
colnames <- c("C1", "C2", "C3", "C4")
matrix(data = v, nrow = 3, ncol = 4,
       dimnames = list(rnames, colnames))
##    C1 C2 C3 C4
## R1  1  4  7 10
## R2  2  5  8 11
## R3  3  6  9 12
matrix(0, 3, 4)
##      [,1] [,2] [,3] [,4]
## [1,]    0    0    0    0
## [2,]    0    0    0    0
## [3,]    0    0    0    0
matrix(NA, 3, 4)
##      [,1] [,2] [,3] [,4]
## [1,]   NA   NA   NA   NA
## [2,]   NA   NA   NA   NA
## [3,]   NA   NA   NA   NA
mat <- matrix(v, ncol = 4)
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
str(mat)
##  int [1:3, 1:4] 1 2 3 4 5 6 7 8 9 10 ...
dim(mat)
## [1] 3 4
dim(mat)[1]
## [1] 3
dim(mat)[2]
## [1] 4
nrow(mat)
## [1] 3
ncol(mat)
## [1] 4
length(mat)
## [1] 12

rbind(), cbind()

v1 <- c(1, 2, 3, 4, 5)
v2 <- c(5, 7, 8, 9, 10)
rbind(v1, v2)
##    [,1] [,2] [,3] [,4] [,5]
## v1    1    2    3    4    5
## v2    5    7    8    9   10
cbind(v1, v2)
##      v1 v2
## [1,]  1  5
## [2,]  2  7
## [3,]  3  8
## [4,]  4  9
## [5,]  5 10
cbind(1:3, 4:6, matrix(7:12, 3, 2))
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
rbind(matrix(1:6, 2, 3), matrix(7:12, 2, 3))
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## [3,]    7    9   11
## [4,]    8   10   12

연산

mtx <- matrix(1:6, 2, 3)
mtx
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
mtx + 1
##      [,1] [,2] [,3]
## [1,]    2    4    6
## [2,]    3    5    7
mtx - 1
##      [,1] [,2] [,3]
## [1,]    0    2    4
## [2,]    1    3    5
mtx * 2
##      [,1] [,2] [,3]
## [1,]    2    6   10
## [2,]    4    8   12
mtx / 2
##      [,1] [,2] [,3]
## [1,]  0.5  1.5  2.5
## [2,]  1.0  2.0  3.0
a <- matrix(1:6, 2, 3)
b <- matrix(6:1, 2, 3)
a + b
##      [,1] [,2] [,3]
## [1,]    7    7    7
## [2,]    7    7    7
a - b
##      [,1] [,2] [,3]
## [1,]   -5   -1    3
## [2,]   -3    1    5
a * b
##      [,1] [,2] [,3]
## [1,]    6   12   10
## [2,]   10   12    6
a / b
##           [,1]     [,2] [,3]
## [1,] 0.1666667 0.750000  2.5
## [2,] 0.4000000 1.333333  6.0
c <- matrix(6:1, 3, 2)
c
##      [,1] [,2]
## [1,]    6    3
## [2,]    5    2
## [3,]    4    1
a
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
# a + c
a * b
##      [,1] [,2] [,3]
## [1,]    6   12   10
## [2,]   10   12    6
a <- matrix(1:6, 2, 3)
a
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
b <- matrix(1:6, 3, 2)
b
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
a %*% b
##      [,1] [,2]
## [1,]   22   49
## [2,]   28   64
c <- matrix(1:6, 2, 3)

mtx <- matrix(1:6, 2, 3)
mtx
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
mtx %*% 1:3
##      [,1]
## [1,]   22
## [2,]   28
#mtx %*% 1:2
1:2 %*% mtx
##      [,1] [,2] [,3]
## [1,]    5   11   17
mtx <- matrix(1:6, 2, 3)
mtx
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
mtx + 1:3
##      [,1] [,2] [,3]
## [1,]    2    6    7
## [2,]    4    5    9

rowSums(), colSums(), rowMeans(), colMean(), t()

mtx
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
rowSums(mtx)
## [1]  9 12
colSums(mtx)
## [1]  3  7 11
rowMeans(mtx)
## [1] 3 4
colMeans(mtx)
## [1] 1.5 3.5 5.5
t(mtx)
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6
1:5
## [1] 1 2 3 4 5
t(1:5)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
mtx
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
mtx[2,]
## [1] 2 4 6
t(mtx[2, ])
##      [,1] [,2] [,3]
## [1,]    2    4    6

인덱싱

v <- 1:12
mat <- matrix(v, 3, 4)
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
str(mat)
##  int [1:3, 1:4] 1 2 3 4 5 6 7 8 9 10 ...
mat[1,]
## [1]  1  4  7 10
mat[, 3]
## [1] 7 8 9
mat[1, , drop = FALSE]
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
mat[, 3, drop = FALSE]
##      [,1]
## [1,]    7
## [2,]    8
## [3,]    9
mat[2:3,]
##      [,1] [,2] [,3] [,4]
## [1,]    2    5    8   11
## [2,]    3    6    9   12
mat[, 3:4]
##      [,1] [,2]
## [1,]    7   10
## [2,]    8   11
## [3,]    9   12
mat[1:2, 2:3]
##      [,1] [,2]
## [1,]    4    7
## [2,]    5    8
mat[c(1, 3),]
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    3    6    9   12
mat[, -c(2, 3)]
##      [,1] [,2]
## [1,]    1   10
## [2,]    2   11
## [3,]    3   12
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
mat[1, 3] <- 77
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
mat[2,] <- c(22, 55)
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]   22   55   22   55
## [3,]    3    6    9   12
mat[2:3, 3:4] <- c(1, 2, 3, 4)
mat
##      [,1] [,2] [,3] [,4]
## [1,]    1    4   77   10
## [2,]   22   55    1    3
## [3,]    3    6    2    4

colnames(), rownames()

city.distance <- c(0, 331, 238, 269, 195,
                   331, 0, 95, 194, 189,
                   238, 95, 0, 171, 130,
                   269, 194, 171, 0, 77,
                   195, 189, 130, 77, 0)
city.distance
##  [1]   0 331 238 269 195 331   0  95 194 189 238  95   0 171 130 269 194 171   0
## [20]  77 195 189 130  77   0
city.distance.mat <- matrix(city.distance, 5, 5, byrow = TRUE)
city.distance.mat
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0  331  238  269  195
## [2,]  331    0   95  194  189
## [3,]  238   95    0  171  130
## [4,]  269  194  171    0   77
## [5,]  195  189  130   77    0
colnames(city.distance.mat) <- c("Seoul", "Busan", "Daegu",
                                 "Gwangju", "Jeonju")
rownames(city.distance.mat) <- c("Seoul", "Busan", "Daegu",
                                 "Gwangju", "Jeonju")
colnames(city.distance.mat)
## [1] "Seoul"   "Busan"   "Daegu"   "Gwangju" "Jeonju"
rownames(city.distance.mat)
## [1] "Seoul"   "Busan"   "Daegu"   "Gwangju" "Jeonju"
city.distance.mat
##         Seoul Busan Daegu Gwangju Jeonju
## Seoul       0   331   238     269    195
## Busan     331     0    95     194    189
## Daegu     238    95     0     171    130
## Gwangju   269   194   171       0     77
## Jeonju    195   189   130      77      0
city.distance.mat["Seoul", "Busan"]
## [1] 331
city.distance.mat[, "Seoul"]
##   Seoul   Busan   Daegu Gwangju  Jeonju 
##       0     331     238     269     195
city.distance.mat[c("Seoul", "Gwangju"),]
##         Seoul Busan Daegu Gwangju Jeonju
## Seoul       0   331   238     269    195
## Gwangju   269   194   171       0     77

배열

a <- 1:24
a
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
dim(a) <- c(3, 4, 2)
a
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24
array(1:12, c(2, 3, 2))
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12
ary <- array(1:12, c(2, 3, 2))
ary
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12
ary[1, 3, 2]
## [1] 11
ary[, 1, 2]
## [1] 7 8
ary[, 1, 2, drop = FALSE]
## , , 1
## 
##      [,1]
## [1,]    7
## [2,]    8
ary[2, ,]
##      [,1] [,2]
## [1,]    2    8
## [2,]    4   10
## [3,]    6   12

리스트

list(), names(), length(), unlist()

list(0.6826, 0.9544, 0.9974)
## [[1]]
## [1] 0.6826
## 
## [[2]]
## [1] 0.9544
## 
## [[3]]
## [1] 0.9974
list(1.23,
     "Apple",
     c(2, 3, 5, 7),
     matrix(1:6, ncol = 3),
     mean)
## [[1]]
## [1] 1.23
## 
## [[2]]
## [1] "Apple"
## 
## [[3]]
## [1] 2 3 5 7
## 
## [[4]]
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## [[5]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x0000000017ebee00>
## <environment: namespace:base>
lst <- list()
lst
## list()
lst[[1]] <- 1.23
lst[[2]] <- "Apple"
lst[[3]] <- c(2, 3, 5, 7)
lst[[4]] <- matrix(1:6, ncol = 3)
lst[[5]] <- mean
lst
## [[1]]
## [1] 1.23
## 
## [[2]]
## [1] "Apple"
## 
## [[3]]
## [1] 2 3 5 7
## 
## [[4]]
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## [[5]]
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x0000000017ebee00>
## <environment: namespace:base>
lst <- list(0.6826, 0.9544, 0.9974)
lst
## [[1]]
## [1] 0.6826
## 
## [[2]]
## [1] 0.9544
## 
## [[3]]
## [1] 0.9974
names(lst) <- c("sigma1", "sigma2", "sigma3")
lst
## $sigma1
## [1] 0.6826
## 
## $sigma2
## [1] 0.9544
## 
## $sigma3
## [1] 0.9974
names(lst)
## [1] "sigma1" "sigma2" "sigma3"
length(lst)
## [1] 3
worldcup1 <- list("Brazil", "Sourth Africa", "Germany")
worldcup2 <- list("Korea-Japan", "France", "USA")
c(worldcup1, worldcup2)
## [[1]]
## [1] "Brazil"
## 
## [[2]]
## [1] "Sourth Africa"
## 
## [[3]]
## [1] "Germany"
## 
## [[4]]
## [1] "Korea-Japan"
## 
## [[5]]
## [1] "France"
## 
## [[6]]
## [1] "USA"
a <- list(1, 2, 3, 4, 5, 6, 7)
mean(a)
## Warning in mean.default(a): argument is not numeric or logical: returning NA
## [1] NA
mean(unlist(a))
## [1] 4
min(unlist(a))
## [1] 1
max(unlist(a))
## [1] 7

인덱싱

product <- list("A002", "Mouse", 30000)
product
## [[1]]
## [1] "A002"
## 
## [[2]]
## [1] "Mouse"
## 
## [[3]]
## [1] 30000
product[[3]]
## [1] 30000
product[[2]]
## [1] "Mouse"
product[3]
## [[1]]
## [1] 30000
class(product[[3]])
## [1] "numeric"
class(product[3])
## [1] "list"
# product[3] * 0.9 # Error
product[[3]] * 0.9
## [1] 27000
product[c(1, 2)]
## [[1]]
## [1] "A002"
## 
## [[2]]
## [1] "Mouse"
product[c(FALSE, TRUE, TRUE)]
## [[1]]
## [1] "Mouse"
## 
## [[2]]
## [1] 30000
product[-1]
## [[1]]
## [1] "Mouse"
## 
## [[2]]
## [1] 30000
product <- list(id = "A002",
                name = "Mouse",
                price = 30000)
product
## $id
## [1] "A002"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000
product[["name"]]
## [1] "Mouse"
product$name
## [1] "Mouse"
product[c("name", "price")]
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000
product[["fourth"]]
## NULL
product$fourth
## NULL
# product[[4]] # Error

product[c(4, 2, 5)]
## $<NA>
## NULL
## 
## $name
## [1] "Mouse"
## 
## $<NA>
## NULL
product[c("fourth", "name", "fifth")]
## $<NA>
## NULL
## 
## $name
## [1] "Mouse"
## 
## $<NA>
## NULL
lst <- list(one = 1,
            two = 2,
            three = list(alpha = 3.1, beta = 3.3))
lst
## $one
## [1] 1
## 
## $two
## [1] 2
## 
## $three
## $three$alpha
## [1] 3.1
## 
## $three$beta
## [1] 3.3
lst[["three"]]
## $alpha
## [1] 3.1
## 
## $beta
## [1] 3.3
lst[["three"]][["beta"]]
## [1] 3.3
lst$three$beta
## [1] 3.3
product <- list(id = "A001",
                name = "Mouse",
                price = 30000)
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000
product[[3]] <- 40000
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000
product[["price"]] <- 40000
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000
product$price <- 40000
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000
product[3] <- 40000
product["price"] <- 40000
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 40000
product[[3]] <- c(30000, 40000)
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000 40000
product[3] <- list(c(30000, 40000))
product
## $id
## [1] "A001"
## 
## $name
## [1] "Mouse"
## 
## $price
## [1] 30000 40000
product[1:3] <- list("A002", "Keyboard", 90000)
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
product[[4]] <- c("Domestic", "Export")
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"
product$madein <- c("Korea", "China")
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"
product[["madein"]] <- c("Korea", "China")
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"
product["madein"] <- list(c("Korea", "China"))
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"
product[6:9] <- list(0.12, 0.15, 0.22, 0.27)
product
## $id
## [1] "A002"
## 
## $name
## [1] "Keyboard"
## 
## $price
## [1] 90000
## 
## [[4]]
## [1] "Domestic" "Export"  
## 
## $madein
## [1] "Korea" "China"
## 
## [[6]]
## [1] 0.12
## 
## [[7]]
## [1] 0.15
## 
## [[8]]
## [1] 0.22
## 
## [[9]]
## [1] 0.27
names <- c("Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun")
values <- c(842, 729, 786, 751, 844, 851, 702)

traffic.death <- list()
traffic.death
## list()
traffic.death[names] <- values
traffic.death
## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751
## 
## $Fri
## [1] 844
## 
## $Sat
## [1] 851
## 
## $Sun
## [1] 702
traffic.death[["Fri"]] <- NULL
traffic.death
## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751
## 
## $Sat
## [1] 851
## 
## $Sun
## [1] 702
traffic.death[c("Sat", "Sun")] <- NULL
traffic.death
## $Mon
## [1] 842
## 
## $Tue
## [1] 729
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751
traffic.death < 750
##   Mon   Tue   Wed  Thur 
## FALSE  TRUE FALSE FALSE
traffic.death[traffic.death < 750] <- NULL
traffic.death
## $Mon
## [1] 842
## 
## $Wed
## [1] 786
## 
## $Thur
## [1] 751

데이터프레임

data.frame()

v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
data.frame(v1, v2, v3)
##     v1       v2    v3
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
data.frame(row.names = v1, v2, v3)
##            v2    v3
## A001    Mouse 30000
## A002 Keyboard 90000
## A003      USB 50000
product <- data.frame(id = v1, name = v2,
                      price = v3)
str(product)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: num  30000 90000 50000
product <- data.frame(id = v1, name = v2,
                      price = v3, stringsAsFactors = TRUE)
str(product)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : Factor w/ 3 levels "A001","A002",..: 1 2 3
##  $ name : Factor w/ 3 levels "Keyboard","Mouse",..: 2 1 3
##  $ price: num  30000 90000 50000
mat <- matrix(c(1, 3, 5, 7, 9,
                2, 4, 6, 8, 10,
                2, 3, 5, 7, 11), ncol = 3)
number <- as.data.frame(mat)
colnames(number) <- c("odd", "even", "prime")
number
##   odd even prime
## 1   1    2     2
## 2   3    4     3
## 3   5    6     5
## 4   7    8     7
## 5   9   10    11
v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
lst <- list(v1, v2, v3)
product <- as.data.frame(lst)
colnames(product) <- c("odd", "even", "prime")
product
##    odd     even prime
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
nrow(product)
## [1] 3
ncol(product)
## [1] 3
length(product)
## [1] 3

rbind(), cbind()

v1 <- c("A001", "A002", "A003")
v2 <- c("Mouse", "Keyboard", "USB")
v3 <- c(30000, 90000, 50000)
product <- data.frame(id = v1, name = v2,
                      price = v3)
product
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
product <- rbind(product,
      c("A004", "Monitor", 250000))
product
##     id     name  price
## 1 A001    Mouse  30000
## 2 A002 Keyboard  90000
## 3 A003      USB  50000
## 4 A004  Monitor 250000
new.rows <- data.frame(id = c("A005", "A006"),
                       name = c("Memory", "CPU"),
                       price = c(35000, 320000))
product <- rbind(product, new.rows)
product
##     id     name  price
## 1 A001    Mouse  30000
## 2 A002 Keyboard  90000
## 3 A003      USB  50000
## 4 A004  Monitor 250000
## 5 A005   Memory  35000
## 6 A006      CPU 320000
product <- cbind(product,
                 madein = c("Korea", "China", "China", "Korea",
                            "Korea", "USA"))
product
##     id     name  price madein
## 1 A001    Mouse  30000  Korea
## 2 A002 Keyboard  90000  China
## 3 A003      USB  50000  China
## 4 A004  Monitor 250000  Korea
## 5 A005   Memory  35000  Korea
## 6 A006      CPU 320000    USA
product$madein = c("Korea", "China", "China", "Korea",
                            "Korea", "USA")
product
##     id     name  price madein
## 1 A001    Mouse  30000  Korea
## 2 A002 Keyboard  90000  China
## 3 A003      USB  50000  China
## 4 A004  Monitor 250000  Korea
## 5 A005   Memory  35000  Korea
## 6 A006      CPU 320000    USA
product$madeina = c("Korea", "China", "China", "Korea",
                            "Korea", "USA")
product
##     id     name  price madein madeina
## 1 A001    Mouse  30000  Korea   Korea
## 2 A002 Keyboard  90000  China   China
## 3 A003      USB  50000  China   China
## 4 A004  Monitor 250000  Korea   Korea
## 5 A005   Memory  35000  Korea   Korea
## 6 A006      CPU 320000    USA     USA
new.cols <- data.frame(manufacturer = c("Logitech",
                                        "Logitech",
                                        "Samsung",
                                        "Samsung",
                                        "Samsung",
                                        "Intel"),
                       quantity = c(20, 15, 50, 30, 40, 10))
new.cols
##   manufacturer quantity
## 1     Logitech       20
## 2     Logitech       15
## 3      Samsung       50
## 4      Samsung       30
## 5      Samsung       40
## 6        Intel       10
product <- cbind(product, new.cols)
product
##     id     name  price madein madeina manufacturer quantity
## 1 A001    Mouse  30000  Korea   Korea     Logitech       20
## 2 A002 Keyboard  90000  China   China     Logitech       15
## 3 A003      USB  50000  China   China      Samsung       50
## 4 A004  Monitor 250000  Korea   Korea      Samsung       30
## 5 A005   Memory  35000  Korea   Korea      Samsung       40
## 6 A006      CPU 320000    USA     USA        Intel       10
cols1 <- data.frame(x = c("a", "b", "c"),
                    y = c(1, 2, 3))
cols2 <- data.frame(x = c("alpha", "beta", "gamma"),
                    y = c(100, 200, 300))
cbind(cols1, cols2)
##   x y     x   y
## 1 a 1 alpha 100
## 2 b 2  beta 200
## 3 c 3 gamma 300

do.call()

df1 <- data.frame(sex = "female", months = 1, weight = 3.5)
df2 <- data.frame(sex = "male", months = 3, weight = 4.8)
df3 <- data.frame(sex = "male", months = 4, weight = 5.3)
df4 <- data.frame(sex = "female", months = 9, weight = 9.4)
df5 <- data.frame(sex = "female", months = 7, weight = 8.3)
lst <- list(df1, df2, df3, df4, df5)

lst[[1]]
##      sex months weight
## 1 female      1    3.5
lst[[2]]
##    sex months weight
## 1 male      3    4.8
rbind(lst[[1]], lst[[2]])
##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8
do.call(rbind, lst)
##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8
## 3   male      4    5.3
## 4 female      9    9.4
## 5 female      7    8.3
lst1 <- list(sex = "female", months = 1, weight = 3.5)
lst2 <- list(sex = "male", months = 3, weight = 4.8)
lst3 <- list(sex = "male", months = 4, weight = 5.3)
lst4 <- list(sex = "female", months = 9, weight = 9.4)
lst5 <- list(sex = "female", months = 7, weight = 8.3)
lst <- list(lst1, lst2, lst3, lst4, lst5)

lst[[1]]
## $sex
## [1] "female"
## 
## $months
## [1] 1
## 
## $weight
## [1] 3.5
as.data.frame(lst[[1]])
##      sex months weight
## 1 female      1    3.5
lapply(lst, as.data.frame)
## [[1]]
##      sex months weight
## 1 female      1    3.5
## 
## [[2]]
##    sex months weight
## 1 male      3    4.8
## 
## [[3]]
##    sex months weight
## 1 male      4    5.3
## 
## [[4]]
##      sex months weight
## 1 female      9    9.4
## 
## [[5]]
##      sex months weight
## 1 female      7    8.3
do.call(rbind, lapply(lst, as.data.frame))
##      sex months weight
## 1 female      1    3.5
## 2   male      3    4.8
## 3   male      4    5.3
## 4 female      9    9.4
## 5 female      7    8.3

인덱싱

us.state <- data.frame(state.abb,
                       state.name,
                       state.region,
                       state.area,
                       stringsAsFactors = FALSE)
us.state
##    state.abb     state.name  state.region state.area
## 1         AL        Alabama         South      51609
## 2         AK         Alaska          West     589757
## 3         AZ        Arizona          West     113909
## 4         AR       Arkansas         South      53104
## 5         CA     California          West     158693
## 6         CO       Colorado          West     104247
## 7         CT    Connecticut     Northeast       5009
## 8         DE       Delaware         South       2057
## 9         FL        Florida         South      58560
## 10        GA        Georgia         South      58876
## 11        HI         Hawaii          West       6450
## 12        ID          Idaho          West      83557
## 13        IL       Illinois North Central      56400
## 14        IN        Indiana North Central      36291
## 15        IA           Iowa North Central      56290
## 16        KS         Kansas North Central      82264
## 17        KY       Kentucky         South      40395
## 18        LA      Louisiana         South      48523
## 19        ME          Maine     Northeast      33215
## 20        MD       Maryland         South      10577
## 21        MA  Massachusetts     Northeast       8257
## 22        MI       Michigan North Central      58216
## 23        MN      Minnesota North Central      84068
## 24        MS    Mississippi         South      47716
## 25        MO       Missouri North Central      69686
## 26        MT        Montana          West     147138
## 27        NE       Nebraska North Central      77227
## 28        NV         Nevada          West     110540
## 29        NH  New Hampshire     Northeast       9304
## 30        NJ     New Jersey     Northeast       7836
## 31        NM     New Mexico          West     121666
## 32        NY       New York     Northeast      49576
## 33        NC North Carolina         South      52586
## 34        ND   North Dakota North Central      70665
## 35        OH           Ohio North Central      41222
## 36        OK       Oklahoma         South      69919
## 37        OR         Oregon          West      96981
## 38        PA   Pennsylvania     Northeast      45333
## 39        RI   Rhode Island     Northeast       1214
## 40        SC South Carolina         South      31055
## 41        SD   South Dakota North Central      77047
## 42        TN      Tennessee         South      42244
## 43        TX          Texas         South     267339
## 44        UT           Utah          West      84916
## 45        VT        Vermont     Northeast       9609
## 46        VA       Virginia         South      40815
## 47        WA     Washington          West      68192
## 48        WV  West Virginia         South      24181
## 49        WI      Wisconsin North Central      56154
## 50        WY        Wyoming          West      97914
str(us.state)
## 'data.frame':    50 obs. of  4 variables:
##  $ state.abb   : chr  "AL" "AK" "AZ" "AR" ...
##  $ state.name  : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ state.region: Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ state.area  : num  51609 589757 113909 53104 158693 ...
us.state[[2]]
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
str(us.state[[2]])
##  chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" "California" "Colorado" ...
us.state[2]
##        state.name
## 1         Alabama
## 2          Alaska
## 3         Arizona
## 4        Arkansas
## 5      California
## 6        Colorado
## 7     Connecticut
## 8        Delaware
## 9         Florida
## 10        Georgia
## 11         Hawaii
## 12          Idaho
## 13       Illinois
## 14        Indiana
## 15           Iowa
## 16         Kansas
## 17       Kentucky
## 18      Louisiana
## 19          Maine
## 20       Maryland
## 21  Massachusetts
## 22       Michigan
## 23      Minnesota
## 24    Mississippi
## 25       Missouri
## 26        Montana
## 27       Nebraska
## 28         Nevada
## 29  New Hampshire
## 30     New Jersey
## 31     New Mexico
## 32       New York
## 33 North Carolina
## 34   North Dakota
## 35           Ohio
## 36       Oklahoma
## 37         Oregon
## 38   Pennsylvania
## 39   Rhode Island
## 40 South Carolina
## 41   South Dakota
## 42      Tennessee
## 43          Texas
## 44           Utah
## 45        Vermont
## 46       Virginia
## 47     Washington
## 48  West Virginia
## 49      Wisconsin
## 50        Wyoming
str(us.state[2])
## 'data.frame':    50 obs. of  1 variable:
##  $ state.name: chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
us.state[c(2, 4)] # 리스트 인덱싱
##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914
us.state[, 2] # 행렬 인덱싱
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
us.state[, 2, drop = FALSE]
##        state.name
## 1         Alabama
## 2          Alaska
## 3         Arizona
## 4        Arkansas
## 5      California
## 6        Colorado
## 7     Connecticut
## 8        Delaware
## 9         Florida
## 10        Georgia
## 11         Hawaii
## 12          Idaho
## 13       Illinois
## 14        Indiana
## 15           Iowa
## 16         Kansas
## 17       Kentucky
## 18      Louisiana
## 19          Maine
## 20       Maryland
## 21  Massachusetts
## 22       Michigan
## 23      Minnesota
## 24    Mississippi
## 25       Missouri
## 26        Montana
## 27       Nebraska
## 28         Nevada
## 29  New Hampshire
## 30     New Jersey
## 31     New Mexico
## 32       New York
## 33 North Carolina
## 34   North Dakota
## 35           Ohio
## 36       Oklahoma
## 37         Oregon
## 38   Pennsylvania
## 39   Rhode Island
## 40 South Carolina
## 41   South Dakota
## 42      Tennessee
## 43          Texas
## 44           Utah
## 45        Vermont
## 46       Virginia
## 47     Washington
## 48  West Virginia
## 49      Wisconsin
## 50        Wyoming
us.state[, c(2, 4)]
##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914
us.state[["state.name"]]
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
us.state$state.name
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
us.state[, "state.name"]
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
us.state[c("state.name", "state.area")]
##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914
us.state[, c("state.name", "state.area")]
##        state.name state.area
## 1         Alabama      51609
## 2          Alaska     589757
## 3         Arizona     113909
## 4        Arkansas      53104
## 5      California     158693
## 6        Colorado     104247
## 7     Connecticut       5009
## 8        Delaware       2057
## 9         Florida      58560
## 10        Georgia      58876
## 11         Hawaii       6450
## 12          Idaho      83557
## 13       Illinois      56400
## 14        Indiana      36291
## 15           Iowa      56290
## 16         Kansas      82264
## 17       Kentucky      40395
## 18      Louisiana      48523
## 19          Maine      33215
## 20       Maryland      10577
## 21  Massachusetts       8257
## 22       Michigan      58216
## 23      Minnesota      84068
## 24    Mississippi      47716
## 25       Missouri      69686
## 26        Montana     147138
## 27       Nebraska      77227
## 28         Nevada     110540
## 29  New Hampshire       9304
## 30     New Jersey       7836
## 31     New Mexico     121666
## 32       New York      49576
## 33 North Carolina      52586
## 34   North Dakota      70665
## 35           Ohio      41222
## 36       Oklahoma      69919
## 37         Oregon      96981
## 38   Pennsylvania      45333
## 39   Rhode Island       1214
## 40 South Carolina      31055
## 41   South Dakota      77047
## 42      Tennessee      42244
## 43          Texas     267339
## 44           Utah      84916
## 45        Vermont       9609
## 46       Virginia      40815
## 47     Washington      68192
## 48  West Virginia      24181
## 49      Wisconsin      56154
## 50        Wyoming      97914
state.x77
##                Population Income Illiteracy Life Exp Murder HS Grad Frost
## Alabama              3615   3624        2.1    69.05   15.1    41.3    20
## Alaska                365   6315        1.5    69.31   11.3    66.7   152
## Arizona              2212   4530        1.8    70.55    7.8    58.1    15
## Arkansas             2110   3378        1.9    70.66   10.1    39.9    65
## California          21198   5114        1.1    71.71   10.3    62.6    20
## Colorado             2541   4884        0.7    72.06    6.8    63.9   166
## Connecticut          3100   5348        1.1    72.48    3.1    56.0   139
## Delaware              579   4809        0.9    70.06    6.2    54.6   103
## Florida              8277   4815        1.3    70.66   10.7    52.6    11
## Georgia              4931   4091        2.0    68.54   13.9    40.6    60
## Hawaii                868   4963        1.9    73.60    6.2    61.9     0
## Idaho                 813   4119        0.6    71.87    5.3    59.5   126
## Illinois            11197   5107        0.9    70.14   10.3    52.6   127
## Indiana              5313   4458        0.7    70.88    7.1    52.9   122
## Iowa                 2861   4628        0.5    72.56    2.3    59.0   140
## Kansas               2280   4669        0.6    72.58    4.5    59.9   114
## Kentucky             3387   3712        1.6    70.10   10.6    38.5    95
## Louisiana            3806   3545        2.8    68.76   13.2    42.2    12
## Maine                1058   3694        0.7    70.39    2.7    54.7   161
## Maryland             4122   5299        0.9    70.22    8.5    52.3   101
## Massachusetts        5814   4755        1.1    71.83    3.3    58.5   103
## Michigan             9111   4751        0.9    70.63   11.1    52.8   125
## Minnesota            3921   4675        0.6    72.96    2.3    57.6   160
## Mississippi          2341   3098        2.4    68.09   12.5    41.0    50
## Missouri             4767   4254        0.8    70.69    9.3    48.8   108
## Montana               746   4347        0.6    70.56    5.0    59.2   155
## Nebraska             1544   4508        0.6    72.60    2.9    59.3   139
## Nevada                590   5149        0.5    69.03   11.5    65.2   188
## New Hampshire         812   4281        0.7    71.23    3.3    57.6   174
## New Jersey           7333   5237        1.1    70.93    5.2    52.5   115
## New Mexico           1144   3601        2.2    70.32    9.7    55.2   120
## New York            18076   4903        1.4    70.55   10.9    52.7    82
## North Carolina       5441   3875        1.8    69.21   11.1    38.5    80
## North Dakota          637   5087        0.8    72.78    1.4    50.3   186
## Ohio                10735   4561        0.8    70.82    7.4    53.2   124
## Oklahoma             2715   3983        1.1    71.42    6.4    51.6    82
## Oregon               2284   4660        0.6    72.13    4.2    60.0    44
## Pennsylvania        11860   4449        1.0    70.43    6.1    50.2   126
## Rhode Island          931   4558        1.3    71.90    2.4    46.4   127
## South Carolina       2816   3635        2.3    67.96   11.6    37.8    65
## South Dakota          681   4167        0.5    72.08    1.7    53.3   172
## Tennessee            4173   3821        1.7    70.11   11.0    41.8    70
## Texas               12237   4188        2.2    70.90   12.2    47.4    35
## Utah                 1203   4022        0.6    72.90    4.5    67.3   137
## Vermont               472   3907        0.6    71.64    5.5    57.1   168
## Virginia             4981   4701        1.4    70.08    9.5    47.8    85
## Washington           3559   4864        0.6    71.72    4.3    63.5    32
## West Virginia        1799   3617        1.4    69.48    6.7    41.6   100
## Wisconsin            4589   4468        0.7    72.48    3.0    54.5   149
## Wyoming               376   4566        0.6    70.29    6.9    62.9   173
##                  Area
## Alabama         50708
## Alaska         566432
## Arizona        113417
## Arkansas        51945
## California     156361
## Colorado       103766
## Connecticut      4862
## Delaware         1982
## Florida         54090
## Georgia         58073
## Hawaii           6425
## Idaho           82677
## Illinois        55748
## Indiana         36097
## Iowa            55941
## Kansas          81787
## Kentucky        39650
## Louisiana       44930
## Maine           30920
## Maryland         9891
## Massachusetts    7826
## Michigan        56817
## Minnesota       79289
## Mississippi     47296
## Missouri        68995
## Montana        145587
## Nebraska        76483
## Nevada         109889
## New Hampshire    9027
## New Jersey       7521
## New Mexico     121412
## New York        47831
## North Carolina  48798
## North Dakota    69273
## Ohio            40975
## Oklahoma        68782
## Oregon          96184
## Pennsylvania    44966
## Rhode Island     1049
## South Carolina  30225
## South Dakota    75955
## Tennessee       41328
## Texas          262134
## Utah            82096
## Vermont          9267
## Virginia        39780
## Washington      66570
## West Virginia   24070
## Wisconsin       54464
## Wyoming         97203
str(state.x77)
##  num [1:50, 1:8] 3615 365 2212 2110 21198 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ : chr [1:8] "Population" "Income" "Illiteracy" "Life Exp" ...
states <- data.frame(state.x77)
str(states)
## 'data.frame':    50 obs. of  8 variables:
##  $ Population: num  3615 365 2212 2110 21198 ...
##  $ Income    : num  3624 6315 4530 3378 5114 ...
##  $ Illiteracy: num  2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
##  $ Life.Exp  : num  69 69.3 70.5 70.7 71.7 ...
##  $ Murder    : num  15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
##  $ HS.Grad   : num  41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
##  $ Frost     : num  20 152 15 65 20 166 139 103 11 60 ...
##  $ Area      : num  50708 566432 113417 51945 156361 ...
row.names(states)
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
states$Name <- row.names(states)
row.names(states) <- NULL
head(states)
##   Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area       Name
## 1       3615   3624        2.1    69.05   15.1    41.3    20  50708    Alabama
## 2        365   6315        1.5    69.31   11.3    66.7   152 566432     Alaska
## 3       2212   4530        1.8    70.55    7.8    58.1    15 113417    Arizona
## 4       2110   3378        1.9    70.66   10.1    39.9    65  51945   Arkansas
## 5      21198   5114        1.1    71.71   10.3    62.6    20 156361 California
## 6       2541   4884        0.7    72.06    6.8    63.9   166 103766   Colorado
rich.states <- states[states$Income > 5000, c("Name", "Income")]
rich.states
##            Name Income
## 2        Alaska   6315
## 5    California   5114
## 7   Connecticut   5348
## 13     Illinois   5107
## 20     Maryland   5299
## 28       Nevada   5149
## 30   New Jersey   5237
## 34 North Dakota   5087
large.states <- states[states$Area > 100000, c("Name", "Area")]
large.states
##          Name   Area
## 2      Alaska 566432
## 3     Arizona 113417
## 5  California 156361
## 6    Colorado 103766
## 26    Montana 145587
## 28     Nevada 109889
## 31 New Mexico 121412
## 43      Texas 262134
merge(rich.states, large.states)
##         Name Income   Area
## 1     Alaska   6315 566432
## 2 California   5114 156361
## 3     Nevada   5149 109889
merge(rich.states, large.states, all = TRUE)
##            Name Income   Area
## 1        Alaska   6315 566432
## 2       Arizona     NA 113417
## 3    California   5114 156361
## 4      Colorado     NA 103766
## 5   Connecticut   5348     NA
## 6      Illinois   5107     NA
## 7      Maryland   5299     NA
## 8       Montana     NA 145587
## 9        Nevada   5149 109889
## 10   New Jersey   5237     NA
## 11   New Mexico     NA 121412
## 12 North Dakota   5087     NA
## 13        Texas     NA 262134

with(), within()

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
r <- iris$Sepal.Length / iris$Sepal.Width
head(r)
## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615
with(iris, Sepal.Length / Sepal.Width)
##   [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615 1.352941 1.470588
##   [9] 1.517241 1.580645 1.459459 1.411765 1.600000 1.433333 1.450000 1.295455
##  [17] 1.384615 1.457143 1.500000 1.342105 1.588235 1.378378 1.277778 1.545455
##  [25] 1.411765 1.666667 1.470588 1.485714 1.529412 1.468750 1.548387 1.588235
##  [33] 1.268293 1.309524 1.580645 1.562500 1.571429 1.361111 1.466667 1.500000
##  [41] 1.428571 1.956522 1.375000 1.428571 1.342105 1.600000 1.342105 1.437500
##  [49] 1.432432 1.515152 2.187500 2.000000 2.225806 2.391304 2.321429 2.035714
##  [57] 1.909091 2.041667 2.275862 1.925926 2.500000 1.966667 2.727273 2.103448
##  [65] 1.931034 2.161290 1.866667 2.148148 2.818182 2.240000 1.843750 2.178571
##  [73] 2.520000 2.178571 2.206897 2.200000 2.428571 2.233333 2.068966 2.192308
##  [81] 2.291667 2.291667 2.148148 2.222222 1.800000 1.764706 2.161290 2.739130
##  [89] 1.866667 2.200000 2.115385 2.033333 2.230769 2.173913 2.074074 1.900000
##  [97] 1.965517 2.137931 2.040000 2.035714 1.909091 2.148148 2.366667 2.172414
## [105] 2.166667 2.533333 1.960000 2.517241 2.680000 2.000000 2.031250 2.370370
## [113] 2.266667 2.280000 2.071429 2.000000 2.166667 2.026316 2.961538 2.727273
## [121] 2.156250 2.000000 2.750000 2.333333 2.030303 2.250000 2.214286 2.033333
## [129] 2.285714 2.400000 2.642857 2.078947 2.285714 2.250000 2.346154 2.566667
## [137] 1.852941 2.064516 2.000000 2.225806 2.161290 2.225806 2.148148 2.125000
## [145] 2.030303 2.233333 2.520000 2.166667 1.823529 1.966667
head(r)
## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615
with(iris, {
  print(summary(Sepal.Length))
  plot(Sepal.Length, Sepal.Width)
  plot(Petal.Length, Petal.Width)
})
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900

with(iris, {
  stats <- summary(Sepal.Length)
  stats
})
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900
# stats

with(iris, {
  stats.nokeep <- summary(Sepal.Length)
  stats.keep <<- summary(Sepal.Length)
})
# stats.nokeep
stats.keep
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.100   5.800   5.843   6.400   7.900
iris$Sepal.Ratio <- iris$Sepal.Length / iris$Sepal.Width
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Ratio
## 1          5.1         3.5          1.4         0.2  setosa    1.457143
## 2          4.9         3.0          1.4         0.2  setosa    1.633333
## 3          4.7         3.2          1.3         0.2  setosa    1.468750
## 4          4.6         3.1          1.5         0.2  setosa    1.483871
## 5          5.0         3.6          1.4         0.2  setosa    1.388889
## 6          5.4         3.9          1.7         0.4  setosa    1.384615
iris <- within(iris,
       Sepal.Ratio <- Sepal.Length / Sepal.Width)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Ratio
## 1          5.1         3.5          1.4         0.2  setosa    1.457143
## 2          4.9         3.0          1.4         0.2  setosa    1.633333
## 3          4.7         3.2          1.3         0.2  setosa    1.468750
## 4          4.6         3.1          1.5         0.2  setosa    1.483871
## 5          5.0         3.6          1.4         0.2  setosa    1.388889
## 6          5.4         3.9          1.7         0.4  setosa    1.384615

attach(), detach()

attach(iris)
search()
##  [1] ".GlobalEnv"        "iris"              "package:stats"    
##  [4] "package:graphics"  "package:grDevices" "package:utils"    
##  [7] "package:datasets"  "package:methods"   "Autoloads"        
## [10] "package:base"
r <- Sepal.Length / Sepal.Width
head(r)
## [1] 1.457143 1.633333 1.468750 1.483871 1.388889 1.384615
detach(iris)
search()
## [1] ".GlobalEnv"        "package:stats"     "package:graphics" 
## [4] "package:grDevices" "package:utils"     "package:datasets" 
## [7] "package:methods"   "Autoloads"         "package:base"
attach(iris)
iris$Sepal.Length <- 0
head(iris$Sepal.Length)
## [1] 0 0 0 0 0 0
head(Sepal.Length)
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
detach(iris)

attach(iris)
Sepal.Width <- Sepal.Width * 10
head(Sepal.Width)
## [1] 35 30 32 31 36 39
ls()
##  [1] "a"                 "ary"               "b"                
##  [4] "c"                 "cha"               "city.distance"    
##  [7] "city.distance.mat" "colnames"          "cols1"            
## [10] "cols2"             "df1"               "df2"              
## [13] "df3"               "df4"               "df5"              
## [16] "eval"              "eval.factor"       "eval.ordered"     
## [19] "even"              "eventday"          "eventday.factor"  
## [22] "food"              "fruit"             "indices"          
## [25] "iris"              "k"                 "large.states"     
## [28] "lst"               "lst1"              "lst2"             
## [31] "lst3"              "lst4"              "lst5"             
## [34] "mat"               "month"             "mtx"              
## [37] "names"             "new.cols"          "new.rows"         
## [40] "num"               "number"            "odd"              
## [43] "p"                 "prime"             "product"          
## [46] "q"                 "r"                 "rainfall"         
## [49] "review"            "review.factor"     "rich.states"      
## [52] "rnames"            "Sepal.Width"       "sex"              
## [55] "sex.factor"        "states"            "stats.keep"       
## [58] "traffic.death"     "us.state"          "v"                
## [61] "v1"                "v2"                "v3"               
## [64] "values"            "w"                 "weekend"          
## [67] "worldcup1"         "worldcup2"         "y"                
## [70] "z"
head(iris$Sepal.Width)
## [1] 3.5 3.0 3.2 3.1 3.6 3.9
detach(iris)
rm(Sepal.Width)

Sepal.Length <- c(4.5, 5.3, 6.7)
Sepal.Length
## [1] 4.5 5.3 6.7
attach(iris)
## The following object is masked _by_ .GlobalEnv:
## 
##     Sepal.Length
# plot(Sepal.Length, Sepal.Width)
Sepal.Length
## [1] 4.5 5.3 6.7

subset(), cor()

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
subset(mtcars,
       subset = (mpg > 30),
       select = mpg)
##                 mpg
## Fiat 128       32.4
## Honda Civic    30.4
## Toyota Corolla 33.9
## Lotus Europa   30.4
subset(mtcars,
       subset = (cyl == "4 cylinders" & am == 0),
       select = c(mpg, hp, wt))
## [1] mpg hp  wt 
## <0 rows> (or 0-length row.names)
subset(mtcars,
       subset = (mpg > mean(mpg)),
       select = c(mpg, cyl, wt))
##                 mpg cyl    wt
## Mazda RX4      21.0   6 2.620
## Mazda RX4 Wag  21.0   6 2.875
## Datsun 710     22.8   4 2.320
## Hornet 4 Drive 21.4   6 3.215
## Merc 240D      24.4   4 3.190
## Merc 230       22.8   4 3.150
## Fiat 128       32.4   4 2.200
## Honda Civic    30.4   4 1.615
## Toyota Corolla 33.9   4 1.835
## Toyota Corona  21.5   4 2.465
## Fiat X1-9      27.3   4 1.935
## Porsche 914-2  26.0   4 2.140
## Lotus Europa   30.4   4 1.513
## Volvo 142E     21.4   4 2.780
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
cor(USArrests)
##              Murder   Assault   UrbanPop      Rape
## Murder   1.00000000 0.8018733 0.06957262 0.5635788
## Assault  0.80187331 1.0000000 0.25887170 0.6652412
## UrbanPop 0.06957262 0.2588717 1.00000000 0.4113412
## Rape     0.56357883 0.6652412 0.41134124 1.0000000
subset(USArrests, select = -UrbanPop)
##                Murder Assault Rape
## Alabama          13.2     236 21.2
## Alaska           10.0     263 44.5
## Arizona           8.1     294 31.0
## Arkansas          8.8     190 19.5
## California        9.0     276 40.6
## Colorado          7.9     204 38.7
## Connecticut       3.3     110 11.1
## Delaware          5.9     238 15.8
## Florida          15.4     335 31.9
## Georgia          17.4     211 25.8
## Hawaii            5.3      46 20.2
## Idaho             2.6     120 14.2
## Illinois         10.4     249 24.0
## Indiana           7.2     113 21.0
## Iowa              2.2      56 11.3
## Kansas            6.0     115 18.0
## Kentucky          9.7     109 16.3
## Louisiana        15.4     249 22.2
## Maine             2.1      83  7.8
## Maryland         11.3     300 27.8
## Massachusetts     4.4     149 16.3
## Michigan         12.1     255 35.1
## Minnesota         2.7      72 14.9
## Mississippi      16.1     259 17.1
## Missouri          9.0     178 28.2
## Montana           6.0     109 16.4
## Nebraska          4.3     102 16.5
## Nevada           12.2     252 46.0
## New Hampshire     2.1      57  9.5
## New Jersey        7.4     159 18.8
## New Mexico       11.4     285 32.1
## New York         11.1     254 26.1
## North Carolina   13.0     337 16.1
## North Dakota      0.8      45  7.3
## Ohio              7.3     120 21.4
## Oklahoma          6.6     151 20.0
## Oregon            4.9     159 29.3
## Pennsylvania      6.3     106 14.9
## Rhode Island      3.4     174  8.3
## South Carolina   14.4     279 22.5
## South Dakota      3.8      86 12.8
## Tennessee        13.2     188 26.9
## Texas            12.7     201 25.5
## Utah              3.2     120 22.9
## Vermont           2.2      48 11.2
## Virginia          8.5     156 20.7
## Washington        4.0     145 26.2
## West Virginia     5.7      81  9.3
## Wisconsin         2.6      53 10.8
## Wyoming           6.8     161 15.6
cor(subset(USArrests, select = -UrbanPop))
##            Murder   Assault      Rape
## Murder  1.0000000 0.8018733 0.5635788
## Assault 0.8018733 1.0000000 0.6652412
## Rape    0.5635788 0.6652412 1.0000000
cor(subset(USArrests, select = -c(UrbanPop, Rape)))
##            Murder   Assault
## Murder  1.0000000 0.8018733
## Assault 0.8018733 1.0000000

sqldf()

library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
data("mtcars")
sqldf("select * from mtcars where mpg > 30", row.names = TRUE)
##                 mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
## Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2
sqldf("select * from mtcars where cyl == '6 cylinders' order by mpg", row.names = TRUE)
##  [1] mpg  cyl  disp hp   drat wt   qsec vs   am   gear carb
## <0 rows> (or 0-length row.names)
sqldf("select avg(mpg) as avg_mpg, avg(wt) as avg_wt, gear from mtcars where carb in (4, 6) group by gear")
##   avg_mpg  avg_wt gear
## 1   12.62 4.68580    3
## 2   19.75 3.09375    4
## 3   17.75 2.97000    5
data(iris)
sqldf("select distinct Species from iris")
##      Species
## 1     setosa
## 2 versicolor
## 3  virginica
sqldf("select * from iris limit 3")
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
sqldf("select avg([Sepal.Length]) from iris where Species = 'setosa'")
##   avg([Sepal.Length])
## 1               5.006
sqldf('select avg("Sepal.Length") from iris where Species = "setosa"
      ')
##   avg("Sepal.Length")
## 1               5.006

텍스트

nchar(), strsplit(), paste(), outer()

x <- "We have a dream"
nchar(x)
## [1] 15
length(x)
## [1] 1
y <- c("we", "have", "a", "dream")
nchar(y)
## [1] 2 4 1 5
length(y)
## [1] 4
nchar(y[4])
## [1] 5
letters
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
sort(letters, decreasing = TRUE)
##  [1] "z" "y" "x" "w" "v" "u" "t" "s" "r" "q" "p" "o" "n" "m" "l" "k" "j" "i" "h"
## [20] "g" "f" "e" "d" "c" "b" "a"
fox.says <- "It is only with the HEART"
tolower(fox.says)
## [1] "it is only with the heart"
toupper(fox.says)
## [1] "IT IS ONLY WITH THE HEART"
strsplit(fox.says, split = " ")
## [[1]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"
strsplit(fox.says, split = "")
## [[1]]
##  [1] "I" "t" " " "i" "s" " " "o" "n" "l" "y" " " "w" "i" "t" "h" " " "t" "h" "e"
## [20] " " "H" "E" "A" "R" "T"
unlist(strsplit(fox.says, split = " "))
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"
strsplit(fox.says, split = " ")[[1]][[3]]
## [1] "only"
unlist(strsplit(fox.says, split = " "))[[3]]
## [1] "only"
littleprince <- c(x, fox.says)
strsplit(littleprince, " ")
## [[1]]
## [1] "We"    "have"  "a"     "dream"
## 
## [[2]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"
strsplit(littleprince, " ")
## [[1]]
## [1] "We"    "have"  "a"     "dream"
## 
## [[2]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"
strsplit(littleprince, " ")[[2]]
## [1] "It"    "is"    "only"  "with"  "the"   "HEART"
strsplit(littleprince, " ")[[2]][[5]]
## [1] "the"
fox.says <- "It is only with the HEART it"
fox.says.word <- strsplit(fox.says, " ")[[1]]
unique(fox.says.word)
## [1] "It"    "is"    "only"  "with"  "the"   "HEART" "it"
unique(tolower(fox.says.word))
## [1] "it"    "is"    "only"  "with"  "the"   "heart"
paste("Everybody", "wants", "to", "fly")
## [1] "Everybody wants to fly"
paste(c("Everybody", "wants", "to", "fly"))
## [1] "Everybody" "wants"     "to"        "fly"
paste("Everybody", "wants", "to", "fly", sep="-")
## [1] "Everybody-wants-to-fly"
paste("Everybody", "wants", "to", "fly", sep="")
## [1] "Everybodywantstofly"
paste0("Everybody", "wants", "to", "fly")
## [1] "Everybodywantstofly"
paste(pi, sqrt(pi))
## [1] "3.14159265358979 1.77245385090552"
paste("25 dgrees celsius is", 25*1.8+32, "degree Fahrenheit")
## [1] "25 dgrees celsius is 77 degree Fahrenheit"
heroes <- c("Batman", "Captain America", "Hulk")
colors <- c("Black", "Blue", "Green")
paste(heroes, colors)
## [1] "Batman Black"         "Captain America Blue" "Hulk Green"
paste("Type", 1:5)
## [1] "Type 1" "Type 2" "Type 3" "Type 4" "Type 5"
paste(heroes, "wants", "to", "fly")
## [1] "Batman wants to fly"          "Captain America wants to fly"
## [3] "Hulk wants to fly"
paste(c("Everybody", "wants", "to", "fly"))
## [1] "Everybody" "wants"     "to"        "fly"
paste(c("Everybody", "wants", "to", "fly"), collapse = " ")
## [1] "Everybody wants to fly"
paste(heroes, "wants", "to", "fly", collapse = ", and ")
## [1] "Batman wants to fly, and Captain America wants to fly, and Hulk wants to fly"
paste(month.abb, 1:12)
##  [1] "Jan 1"  "Feb 2"  "Mar 3"  "Apr 4"  "May 5"  "Jun 6"  "Jul 7"  "Aug 8" 
##  [9] "Sep 9"  "Oct 10" "Nov 11" "Dec 12"
paste(month.abb, 1:12, sep="_")
##  [1] "Jan_1"  "Feb_2"  "Mar_3"  "Apr_4"  "May_5"  "Jun_6"  "Jul_7"  "Aug_8" 
##  [9] "Sep_9"  "Oct_10" "Nov_11" "Dec_12"
paste(month.abb, 1:12, sep="_", collapse = "-")
## [1] "Jan_1-Feb_2-Mar_3-Apr_4-May_5-Jun_6-Jul_7-Aug_8-Sep_9-Oct_10-Nov_11-Dec_12"
outer(c(1, 2, 3), c(1, 2, 3))
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## [3,]    3    6    9
asian.countries <- c("Korea", "Japan","China")
info <- c("GDP", "Population", "Area")
outer(asian.countries, info, FUN = paste, sep = "-")
##      [,1]        [,2]               [,3]        
## [1,] "Korea-GDP" "Korea-Population" "Korea-Area"
## [2,] "Japan-GDP" "Japan-Population" "Japan-Area"
## [3,] "China-GDP" "China-Population" "China-Area"
x <- outer(asian.countries, asian.countries, FUN = paste, sep = "-")
x[!lower.tri(x)]
## [1] "Korea-Korea" "Korea-Japan" "Japan-Japan" "Korea-China" "Japan-China"
## [6] "China-China"

sprintf()

customer <- "Jobs"
buysize <- 10
deliveryday <- 3
paste("Hello ", customer, ", your order of ", buysize,
      " product(s) will be dilivered within ", deliveryday,
      "day(s)", sep = "")
## [1] "Hello Jobs, your order of 10 product(s) will be dilivered within 3day(s)"
sprintf("Hello %s your order of %s product(s) will be 
        dilivered within %s day(s)", customer, buysize, deliveryday)
## [1] "Hello Jobs your order of 10 product(s) will be \n        dilivered within 3 day(s)"
customer <- c("Jobs", "Gates", "Bezos")
buysize <- c(10, 7, 12)
deliveryday <- c(3, 2, 7.5)
sprintf("Hello %s your order of %s product(s) will be 
        dilivered within %s day(s)",
        customer, buysize, deliveryday)
## [1] "Hello Jobs your order of 10 product(s) will be \n        dilivered within 3 day(s)"   
## [2] "Hello Gates your order of 7 product(s) will be \n        dilivered within 2 day(s)"   
## [3] "Hello Bezos your order of 12 product(s) will be \n        dilivered within 7.5 day(s)"

substr(), substring(), grep(), sub()

substr("Data Analytics", start = 1, stop = 4)
## [1] "Data"
substr("Data Analytics", start = 6, stop = 14)
## [1] "Analytics"
substring("Data Analytics", 6)
## [1] "Analytics"
class <- c("Data Analytics", "Data Mining", "Data Visualization")
substr(class, 1, 4)
## [1] "Data" "Data" "Data"
countries <- c("Korea, KR", "Unites States, US", "China, CH")
substr(countries, nchar(countries) - 1, nchar(countries))
## [1] "KR" "US" "CH"
head(islands)
##       Africa   Antarctica         Asia    Australia Axel Heiberg       Baffin 
##        11506         5500        16988         2968           16          184
landmasses <- names(islands)
index <- grep(pattern = "New", x = landmasses)
landmasses[index]
## [1] "New Britain"     "New Guinea"      "New Zealand (N)" "New Zealand (S)"
## [5] "Newfoundland"
grep(pattern = "New", x = landmasses, value = TRUE)
## [1] "New Britain"     "New Guinea"      "New Zealand (N)" "New Zealand (S)"
## [5] "Newfoundland"
landmasses[grep(pattern = " ", landmasses)]
##  [1] "Axel Heiberg"     "New Britain"      "New Guinea"       "New Zealand (N)" 
##  [5] "New Zealand (S)"  "North America"    "Novaya Zemlya"    "Prince of Wales" 
##  [9] "South America"    "Tierra del Fuego"
grep(" ", landmasses, value = TRUE)
##  [1] "Axel Heiberg"     "New Britain"      "New Guinea"       "New Zealand (N)" 
##  [5] "New Zealand (S)"  "North America"    "Novaya Zemlya"    "Prince of Wales" 
##  [9] "South America"    "Tierra del Fuego"
fox.says <- "It is only with the HEART that is"
sub(pattern = "is", replacement = "was", x = fox.says)
## [1] "It was only with the HEART that is"
gsub(pattern = "is", replacement = "was", x = fox.says)
## [1] "It was only with the HEART that was"
x <- c("product.csv", "customer.csv", "supplier.csv")
sub(pattern = ".csv", "", x)
## [1] "product"  "customer" "supplier"

정규표현식

[:digit:] : [0-9]
[:lower:] : [a-z]
[:upper:] : [A-Z]
[:alpha:] : [A-z]
[:alnum:] : [A-z0-9]
[:punct:] : 문장부호
[:blank:] : space, tab
[:space:] : space, tab, newline, form feed, carrage return
[:print:] : [[:alnum:][:punct:][:space:]]
[:graph:] : 그래프 문자(읽을 수 있는 문자)

? : 0~1회
* : 0회 이상
+ : 1회 이상
{n} : n회 반복
{n,} : n회 이상 반복
{n, m} : n회~m회 반복

\w : [[:alnum:]_] 단어 문자
\W : [^[:alnum:]_] 단어 문자를 제외한 문자
\d : [[:digit:]] 숫자
\D : [^[:digit:]] 숫자를 제외한 문자
\s : [[:space:]] 스페이스 문자
\S : [^[:space:]] 스페이스 문자를 제외한 문자
\b : 단어 경계의 빈 문자열
\B : 단어 경계의 빈 문자열을 제외한 문자
\< : 단어 시작
\> : 단어 끝
words <- c("at", "bat", "cat", "chaenomelss", "chase", "chasse",
           "cheap", "check", "cheese", "chick", "hat")
grep("che", words, value = TRUE)
## [1] "cheap"  "check"  "cheese"
grep("at", words, value = TRUE)
## [1] "at"  "bat" "cat" "hat"
grep("[ch]", words, value = TRUE)
## [1] "cat"         "chaenomelss" "chase"       "chasse"      "cheap"      
## [6] "check"       "cheese"      "chick"       "hat"
grep("[at]", words, value = TRUE)
## [1] "at"          "bat"         "cat"         "chaenomelss" "chase"      
## [6] "chasse"      "cheap"       "hat"
grep("ch|at", words, value = TRUE)
##  [1] "at"          "bat"         "cat"         "chaenomelss" "chase"      
##  [6] "chasse"      "cheap"       "check"       "cheese"      "chick"      
## [11] "hat"
grep("ch(e|i)ck", words, value = TRUE)
## [1] "check" "chick"
grep("chas?e", words, value = TRUE)
## [1] "chaenomelss" "chase"
grep("chas*e", words, value = TRUE)
## [1] "chaenomelss" "chase"       "chasse"
grep("chas+e", words, value = TRUE)
## [1] "chase"  "chasse"
grep("ch(a*|e*)s+e", words, value = TRUE)
## [1] "chase"  "chasse" "cheese"
grep("^c", words, value = TRUE)
## [1] "cat"         "chaenomelss" "chase"       "chasse"      "cheap"      
## [6] "check"       "cheese"      "chick"
grep("t$", words, value = TRUE)
## [1] "at"  "bat" "cat" "hat"
grep("^c.t$", words, value = TRUE)
## [1] "cat"
grep("^[hc]?at", words, value = TRUE)
## [1] "at"  "cat" "hat"
words2 <- c("12 Dec", "OK", "http://", "<TITLE>Time?</TITLE>",
            "12345", "Hi there")
grep("[[:alnum:]]", words2, value = TRUE)
## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "12345"                "Hi there"
grep("[[:alpha:]]", words2, value = TRUE)
## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "Hi there"
grep("[[:digit:]]", words2, value = TRUE)
## [1] "12 Dec" "12345"
grep("[[:punct:]]", words2, value = TRUE)
## [1] "http://"              "<TITLE>Time?</TITLE>"
grep("[[:space:]]", words2, value = TRUE)
## [1] "12 Dec"   "Hi there"
grep("\\w+", words2, value = TRUE)
## [1] "12 Dec"               "OK"                   "http://"             
## [4] "<TITLE>Time?</TITLE>" "12345"                "Hi there"
grep("\\d+", words2, value = TRUE)
## [1] "12 Dec" "12345"
grep("\\s+", words2, value = TRUE)
## [1] "12 Dec"   "Hi there"

library(base)

grep(), grepl()

string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")

grep(pattern = "data", x = string)
## [1] 1 3
grep(pattern = "data", x = string, value = TRUE)
## [1] "data analytics in useful"                                
## [2] "visualization of data is interesting for data scientists"
string[grep(pattern = "data", x = string)]
## [1] "data analytics in useful"                                
## [2] "visualization of data is interesting for data scientists"
grep("useful|helpful", string, value = TRUE)
## [1] "data analytics in useful"      "business analytics is helpful"
grep("useful|helpful", string, value = TRUE, invert = TRUE)
## [1] "visualization of data is interesting for data scientists"
grepl(pattern = "data", x = string)
## [1]  TRUE FALSE  TRUE
state.name
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
grepl("new", state.name, ignore.case = TRUE)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE
state.name[grepl("new", state.name, ignore.case = TRUE)]
## [1] "New Hampshire" "New Jersey"    "New Mexico"    "New York"
sum(grepl("new", state.name, ignore.case = TRUE))
## [1] 4

regexpr(), gregexpr(), rematches()

regexpr(pattern = "data", text = string)
## [1]  1 -1 18
## attr(,"match.length")
## [1]  4 -1  4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
gregexpr(pattern = "data", text = string)
## [[1]]
## [1] 1
## attr(,"match.length")
## [1] 4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## 
## [[3]]
## [1] 18 42
## attr(,"match.length")
## [1] 4 4
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
regmatches(x = string, m = regexpr(pattern = "data", text = string))
## [1] "data" "data"
regmatches(x = string, m = gregexpr(pattern = "data", text = string))
## [[1]]
## [1] "data"
## 
## [[2]]
## character(0)
## 
## [[3]]
## [1] "data" "data"
regmatches(x = string,
           m = gregexpr(pattern = "data", text = string),
           invert = TRUE)
## [[1]]
## [1] ""                     " analytics in useful"
## 
## [[2]]
## [1] "business analytics is helpful"
## 
## [[3]]
## [1] "visualization of "    " is interesting for " " scientists"

sub(), gsub(), strsplit()

sub(pattern = "data", replacement = "text", x = string)
## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for data scientists"
gsub(pattern = "data", replacement = "text", x = string)
## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for text scientists"
strsplit(x = string, split = " ")
## [[1]]
## [1] "data"      "analytics" "in"        "useful"   
## 
## [[2]]
## [1] "business"  "analytics" "is"        "helpful"  
## 
## [[3]]
## [1] "visualization" "of"            "data"          "is"           
## [5] "interesting"   "for"           "data"          "scientists"
unlist(strsplit(x = string, split = " "))
##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "analytics"     "is"            "helpful"      
##  [9] "visualization" "of"            "data"          "is"           
## [13] "interesting"   "for"           "data"          "scientists"
unique(unlist(strsplit(x = string, split = " ")))
##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "is"            "helpful"       "visualization"
##  [9] "of"            "interesting"   "for"           "scientists"

library(stringr)

ste_detect()

string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")
library(stringr)
## 
## Attaching package: 'stringr'
## The following objects are masked _by_ '.GlobalEnv':
## 
##     fruit, words
str_detect(string = string, pattern = "data")
## [1]  TRUE FALSE  TRUE
str_detect(string = string, pattern = "DATA")
## [1] FALSE FALSE FALSE
str_detect(string = string, fixed(pattern = "DATA", ignore_case = TRUE))
## [1]  TRUE FALSE  TRUE
str_detect(c("abz", "ayz", "a.z"), "a.z")
## [1] TRUE TRUE TRUE
str_detect(c("abz", "ayz", "a.z"), fixed("a.z"))
## [1] FALSE FALSE  TRUE
str_detect(c("abz", "ayz", "a.z"), "a\\.z")
## [1] FALSE FALSE  TRUE

str_locate(), str_locate_all(), str_extract(), str_extract_all()

str_locate(string, "data")
##      start end
## [1,]     1   4
## [2,]    NA  NA
## [3,]    18  21
str_locate_all(string, "data")
## [[1]]
##      start end
## [1,]     1   4
## 
## [[2]]
##      start end
## 
## [[3]]
##      start end
## [1,]    18  21
## [2,]    42  45
str_extract(string, "data")
## [1] "data" NA     "data"
str_extract_all(string, "data")
## [[1]]
## [1] "data"
## 
## [[2]]
## character(0)
## 
## [[3]]
## [1] "data" "data"
str_extract_all(string, "data", simplify = TRUE)
##      [,1]   [,2]  
## [1,] "data" ""    
## [2,] ""     ""    
## [3,] "data" "data"
unlist(str_extract_all(string, "data"))
## [1] "data" "data" "data"

str_match(), str_match_all(), str_replace(), str_replace_all()

sentences5 <- sentences[1:5]
sentences5
## [1] "The birch canoe slid on the smooth planks." 
## [2] "Glue the sheet to the dark blue background."
## [3] "It's easy to tell the depth of a well."     
## [4] "These days a chicken leg is a rare dish."   
## [5] "Rice is often served in round bowls."
str_extract(sentences5, "(a|A|th|the) (\\w+)")
## [1] "the smooth" "the sheet"  "the depth"  "a chicken"  NA
str_match(sentences5, "(a|A|th|the) (\\w+)")
##      [,1]         [,2]  [,3]     
## [1,] "the smooth" "the" "smooth" 
## [2,] "the sheet"  "the" "sheet"  
## [3,] "the depth"  "the" "depth"  
## [4,] "a chicken"  "a"   "chicken"
## [5,] NA           NA    NA
str_match_all(sentences5, "(a|A|th|the) (\\w+)")
## [[1]]
##      [,1]         [,2]  [,3]    
## [1,] "the smooth" "the" "smooth"
## 
## [[2]]
##      [,1]        [,2]  [,3]   
## [1,] "the sheet" "the" "sheet"
## [2,] "the dark"  "the" "dark" 
## 
## [[3]]
##      [,1]        [,2]  [,3]   
## [1,] "the depth" "the" "depth"
## [2,] "a well"    "a"   "well" 
## 
## [[4]]
##      [,1]        [,2] [,3]     
## [1,] "a chicken" "a"  "chicken"
## [2,] "a rare"    "a"  "rare"   
## 
## [[5]]
##      [,1] [,2] [,3]
str_replace(string = string, pattern = "data", replacement = "text")
## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for data scientists"
str_replace_all(string = string, pattern = "data", replacement = "text")
## [1] "text analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of text is interesting for text scientists"

str_split(), str_length(), str_count()

str_split(string, " ")
## [[1]]
## [1] "data"      "analytics" "in"        "useful"   
## 
## [[2]]
## [1] "business"  "analytics" "is"        "helpful"  
## 
## [[3]]
## [1] "visualization" "of"            "data"          "is"           
## [5] "interesting"   "for"           "data"          "scientists"
unlist(str_split(string, " "))
##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "analytics"     "is"            "helpful"      
##  [9] "visualization" "of"            "data"          "is"           
## [13] "interesting"   "for"           "data"          "scientists"
unique(unlist(str_split(string, " ")))
##  [1] "data"          "analytics"     "in"            "useful"       
##  [5] "business"      "is"            "helpful"       "visualization"
##  [9] "of"            "interesting"   "for"           "scientists"
str_split(string, " ", n = 3)
## [[1]]
## [1] "data"      "analytics" "in useful"
## 
## [[2]]
## [1] "business"   "analytics"  "is helpful"
## 
## [[3]]
## [1] "visualization"                          
## [2] "of"                                     
## [3] "data is interesting for data scientists"
str_split(string, " ", n = 3, simplify = TRUE)
##      [,1]            [,2]        [,3]                                     
## [1,] "data"          "analytics" "in useful"                              
## [2,] "business"      "analytics" "is helpful"                             
## [3,] "visualization" "of"        "data is interesting for data scientists"
str_length(string)
## [1] 24 29 56
str_count(string, "data")
## [1] 1 0 2
str_count(string, "\\w+")
## [1] 4 4 8

str_pad(), str_trim()

str_pad(string = c("a", "abc", "abcde"),
        width = 6,
        side = "left",
        pad = " ")
## [1] "     a" "   abc" " abcde"
mon <- 1:12
str_pad(mon, width = 2, side = "left", pad = "0")
##  [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"
string <- c("data analytics in useful",
            "business analytics is helpful",
            "visualization of data is interesting for data scientists")
str.pad <- str_pad(string,
                   width = max(str_length(string)),
                   side = "both",
                   pad = " ")
str.pad
## [1] "                data analytics in useful                "
## [2] "             business analytics is helpful              "
## [3] "visualization of data is interesting for data scientists"
str_trim(str.pad, side = "both")
## [1] "data analytics in useful"                                
## [2] "business analytics is helpful"                           
## [3] "visualization of data is interesting for data scientists"

str_c()

str_c("data", "mining", sep = " ")
## [1] "data mining"
str.mining <- str_c(c("data mining", "text mining"),
                    "is useful",
                    sep = " ")
str.mining
## [1] "data mining is useful" "text mining is useful"
str_c(str.mining, collapse = "; ")
## [1] "data mining is useful; text mining is useful"
str_c(str.mining, collapse = "\n")
## [1] "data mining is useful\ntext mining is useful"
cat(str_c(str.mining, collapse = "\n"))
## data mining is useful
## text mining is useful
str_sub(string = str.mining, start = 1, end = 4)
## [1] "data" "text"
str_sub(str.mining, 5, 5)
## [1] " " " "
str_sub(str.mining, 5, 5) <- "-"
str.mining
## [1] "data-mining is useful" "text-mining is useful"
str_sub("abcdefg", start = -2)
## [1] "fg"
str_sub("abcdefg", end = -3)
## [1] "abcde"

파일 읽기

Sample Files : https://github.com/kykwahk/YouTube

read.csv(), read.table(), read.fwf()

# list.files("rBasicLec")
# library(pander)
# openFileInOS("C:/Users/jacea/workspaceR/RPubs/rBasicLec/product.csv")

read.csv("rBasicLec/product.csv")
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
read.csv("rBasicLec/product-with-no-header.csv", header = FALSE)
##     V1       V2    V3
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
p <- read.csv("rBasicLec/product.csv")
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000
read.table("rBasicLec/product.txt")
##     V1       V2    V3
## 1   id     name price
## 2 A001    Mouse 30000
## 3 A002 Keyboard 90000
## 4 A003      USB 50000
p <- read.table("rBasicLec/product.txt", header = TRUE)
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000
p <- read.table("rBasicLec/product.txt",
                header = TRUE, stringsAsFactors = FALSE)
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 90000 50000
p <- read.table("rBasicLec/product-colon.txt",
                sep = ":",
                header = TRUE,
                stringsAsFactors = FALSE)
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  " Mouse" " Keyboard" " USB"
##  $ price: int  30000 90000 50000
p <- read.table("rBasicLec/product-missing.txt",
                header = TRUE)
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: chr  "30000" "." "50000"
p <- read.table("rBasicLec/product-missing.txt",
                header = TRUE,
                na.strings = ".")
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse" "Keyboard" "USB"
##  $ price: int  30000 NA 50000
p <- read.fwf("rBasicLec/product-fwf.txt",
                widths = c(4, -1, 10, 8),
                col.names = c("id", "name", "price"))
str(p)
## 'data.frame':    3 obs. of  3 variables:
##  $ id   : chr  "A001" "A002" "A003"
##  $ name : chr  "Mouse     " "Keyboard  " "USB       "
##  $ price: int  30000 90000 50000

readLines(), scan()

readLines("rBasicLec/won-dollar.txt")
## [1] "2014-11-27 1116.70 1078.30 2014-11-28 1127.89 1089.11"                           
## [2] "2014-12-01 1130.13 1091.27 2014-12-02 1130.13 1091.27 2014-12-03 1131.86 1092.94"
## [3] "2014-12-04 1134.51 1095.49"                                                      
## [4] "2014-12-05 1134.51 1095.49 2014-12-08 1139.60 1100.40"                           
## [5] "2014-12-09 1134.51 1095.49 2014-12-10 1121.79 1083.21"
readLines("rBasicLec/won-dollar.txt", n = 2)
## [1] "2014-11-27 1116.70 1078.30 2014-11-28 1127.89 1089.11"                           
## [2] "2014-12-01 1130.13 1091.27 2014-12-02 1130.13 1091.27 2014-12-03 1131.86 1092.94"
scan("rBasicLec/won-dollar.txt", what = character())
##  [1] "2014-11-27" "1116.70"    "1078.30"    "2014-11-28" "1127.89"   
##  [6] "1089.11"    "2014-12-01" "1130.13"    "1091.27"    "2014-12-02"
## [11] "1130.13"    "1091.27"    "2014-12-03" "1131.86"    "1092.94"   
## [16] "2014-12-04" "1134.51"    "1095.49"    "2014-12-05" "1134.51"   
## [21] "1095.49"    "2014-12-08" "1139.60"    "1100.40"    "2014-12-09"
## [26] "1134.51"    "1095.49"    "2014-12-10" "1121.79"    "1083.21"
scan("rBasicLec/won-dollar.txt",
     what = list(character(),
                 numeric(),
                 numeric()))
## [[1]]
##  [1] "2014-11-27" "2014-11-28" "2014-12-01" "2014-12-02" "2014-12-03"
##  [6] "2014-12-04" "2014-12-05" "2014-12-08" "2014-12-09" "2014-12-10"
## 
## [[2]]
##  [1] 1116.70 1127.89 1130.13 1130.13 1131.86 1134.51 1134.51 1139.60 1134.51
## [10] 1121.79
## 
## [[3]]
##  [1] 1078.30 1089.11 1091.27 1091.27 1092.94 1095.49 1095.49 1100.40 1095.49
## [10] 1083.21
scan("rBasicLec/won-dollar.txt",
     what = list(date = character(),
                 buy = numeric(),
                 sell = numeric()),
     nlines = 2)
## $date
## [1] "2014-11-27" "2014-11-28" "2014-12-01" "2014-12-02" "2014-12-03"
## 
## $buy
## [1] 1116.70 1127.89 1130.13 1130.13 1131.86
## 
## $sell
## [1] 1078.30 1089.11 1091.27 1091.27 1092.94
scan("rBasicLec/won-dollar.txt",
     what = list(date = character(),
                 buy = numeric(),
                 sell = numeric()),
     skip = 3)
## $date
## [1] "2014-12-05" "2014-12-08" "2014-12-09" "2014-12-10"
## 
## $buy
## [1] 1134.51 1139.60 1134.51 1121.79
## 
## $sell
## [1] 1095.49 1100.40 1095.49 1083.21

library(openxlsx)

library(openxlsx)

read.xlsx("rBasicLec/product.xlsx",
          colNames = TRUE,
          sheet = 1)
##     id     name price madein
## 1 A001    Mouse 30000     KR
## 2 A002 Keyboard 90000     CN
## 3 A003      USB 50000     US

날짜

Sys.Date(), date(), Sys.time(), weekdays()

Sys.Date()
## [1] "2020-07-31"
class(Sys.Date())
## [1] "Date"
date()
## [1] "Fri Jul 31 23:30:28 2020"
class(date())
## [1] "character"
Sys.time()
## [1] "2020-07-31 23:30:28 KST"
class(Sys.time())
## [1] "POSIXct" "POSIXt"
as.Date("2025-12-31")
## [1] "2025-12-31"
as.Date("2020/11/02")
## [1] "2020-11-02"
as.Date("12/3/2021", format("%m/%d/%Y"))
## [1] "2021-12-03"
# ?strptime

d <- as.Date("2025-12-31")
format(d, format = "%m/%d/%Y")
## [1] "12/31/2025"
today <- Sys.Date()
format(today, format = "%Y/%m/%d")
## [1] "2020/07/31"
format(today, format = "%Y/%m/%d %A")
## [1] "2020/07/31 금요일"
format(today, format = "%Y/%m/%d %a")
## [1] "2020/07/31 금"
d <- as.Date("2025-12-31")
weekdays(d)
## [1] "수요일"
d + 7
## [1] "2026-01-07"
d + 1:7
## [1] "2026-01-01" "2026-01-02" "2026-01-03" "2026-01-04" "2026-01-05"
## [6] "2026-01-06" "2026-01-07"
weekdays(d + 1:7)
## [1] "목요일" "금요일" "토요일" "일요일" "월요일" "화요일" "수요일"
start <- as.Date("2025-01-01")
end <- as.Date("2025-01-31")
seq(from = start, to = end, by = 1)
##  [1] "2025-01-01" "2025-01-02" "2025-01-03" "2025-01-04" "2025-01-05"
##  [6] "2025-01-06" "2025-01-07" "2025-01-08" "2025-01-09" "2025-01-10"
## [11] "2025-01-11" "2025-01-12" "2025-01-13" "2025-01-14" "2025-01-15"
## [16] "2025-01-16" "2025-01-17" "2025-01-18" "2025-01-19" "2025-01-20"
## [21] "2025-01-21" "2025-01-22" "2025-01-23" "2025-01-24" "2025-01-25"
## [26] "2025-01-26" "2025-01-27" "2025-01-28" "2025-01-29" "2025-01-30"
## [31] "2025-01-31"
seq(from = start, by = 1, length.out = 7)
## [1] "2025-01-01" "2025-01-02" "2025-01-03" "2025-01-04" "2025-01-05"
## [6] "2025-01-06" "2025-01-07"
seq(from = start, by = "7 days", length.out = 7)
## [1] "2025-01-01" "2025-01-08" "2025-01-15" "2025-01-22" "2025-01-29"
## [6] "2025-02-05" "2025-02-12"
seq(from = start, by = "week", length.out = 7)
## [1] "2025-01-01" "2025-01-08" "2025-01-15" "2025-01-22" "2025-01-29"
## [6] "2025-02-05" "2025-02-12"
seq(from = start, by = "month", length.out = 12)
##  [1] "2025-01-01" "2025-02-01" "2025-03-01" "2025-04-01" "2025-05-01"
##  [6] "2025-06-01" "2025-07-01" "2025-08-01" "2025-09-01" "2025-10-01"
## [11] "2025-11-01" "2025-12-01"
seq(from = start, by = "3 months", length.out = 4)
## [1] "2025-01-01" "2025-04-01" "2025-07-01" "2025-10-01"
seq(from = start, by = "year", length.out = 10)
##  [1] "2025-01-01" "2026-01-01" "2027-01-01" "2028-01-01" "2029-01-01"
##  [6] "2030-01-01" "2031-01-01" "2032-01-01" "2033-01-01" "2034-01-01"
seq(from = as.Date("2025-01-30"),
    by = "month",
    length.out = 6)
## [1] "2025-01-30" "2025-03-02" "2025-03-30" "2025-04-30" "2025-05-30"
## [6] "2025-06-30"

months(), quarters(), Sys.setlocale()

start <- as.Date("2025-01-01")
qrt <- seq(from = start, by = "3 months", length.out = 4)
start
## [1] "2025-01-01"
qrt
## [1] "2025-01-01" "2025-04-01" "2025-07-01" "2025-10-01"
months(qrt)
## [1] "1월"  "4월"  "7월"  "10월"
quarters(qrt)
## [1] "Q1" "Q2" "Q3" "Q4"
Sys.getlocale()
## [1] "LC_COLLATE=Korean_Korea.949;LC_CTYPE=Korean_Korea.949;LC_MONETARY=Korean_Korea.949;LC_NUMERIC=C;LC_TIME=Korean_Korea.949"
Sys.setlocale("LC_TIME", "C")
## [1] "C"
months(qrt)
## [1] "January" "April"   "July"    "October"
Sys.setlocale("LC_TIME", "Korean_Korea.949")
## [1] "Korean_Korea.949"
months(qrt)
## [1] "1월"  "4월"  "7월"  "10월"
Sys.setlocale()
## [1] "LC_COLLATE=Korean_Korea.949;LC_CTYPE=Korean_Korea.949;LC_MONETARY=Korean_Korea.949;LC_NUMERIC=C;LC_TIME=Korean_Korea.949"

as.POSIXct(), as.POSIXlt(), strptime()

pct <- as.POSIXct("2025/03/15, 15:03:02", format("%Y/%m/%d, %H:%M:%S"), tz = "Asia/Seoul") # 숫자
pct
## [1] "2025-03-15 15:03:02 KST"
class(pct)
## [1] "POSIXct" "POSIXt"
as.integer(pct)
## [1] 1742018582
plt <- as.POSIXlt("2025/03/15, 15:03:02", format("%Y/%m/%d, %H:%M:%S"), tz = "Asia/Seoul") # 리스트
plt
## [1] "2025-03-15 15:03:02 KST"
class(plt)
## [1] "POSIXlt" "POSIXt"
as.integer(plt)
## Warning: 강제형변환에 의해 생성된 NA 입니다
##  [1]   2   3  15  15   2 125   6  73   0  NA  NA
unclass(plt)
## $sec
## [1] 2
## 
## $min
## [1] 3
## 
## $hour
## [1] 15
## 
## $mday
## [1] 15
## 
## $mon
## [1] 2
## 
## $year
## [1] 125
## 
## $wday
## [1] 6
## 
## $yday
## [1] 73
## 
## $isdst
## [1] 0
## 
## $zone
## [1] "KST"
## 
## $gmtoff
## [1] NA
## 
## attr(,"tzone")
## [1] "Asia/Seoul"
plt$mday
## [1] 15
plt$mon
## [1] 2
plt$year
## [1] 125
plt$wday
## [1] 6
plt$hour
## [1] 15
dposix <- as.Date("2025-12-31")
dposix
## [1] "2025-12-31"
as.POSIXlt(dposix)$wday
## [1] 3
as.POSIXlt(dposix)$yday
## [1] 364
as.POSIXlt(dposix)$year + 1900
## [1] 2025
as.POSIXlt(dposix)$mon + 1
## [1] 12
strptime("2025-12-31", format="%Y-%m-%d")
## [1] "2025-12-31 KST"
class(strptime("2025-12-31", format="%Y-%m-%d"))
## [1] "POSIXlt" "POSIXt"
strptime("2025-12-31", format="%Y-%m-%d")$year + 1900
## [1] 2025

format(), ISOdate(), difftime()

moon <- as.POSIXct("1969/07/20, 20:17:39",
                   format("%Y/%m/%d, %H:%M:%S"),
                   tz = "UTC")
moon
## [1] "1969-07-20 20:17:39 UTC"
format(moon, "The time of the Apollo moon landing was %Y/%m/%d, at %H:%M:%S.")
## [1] "The time of the Apollo moon landing was 1969/07/20, at 20:17:39."
y <- 2020
m <- 12
d <- 31
ISOdate(y, m, d)
## [1] "2020-12-31 12:00:00 GMT"
class(ISOdate(y, m, d))
## [1] "POSIXct" "POSIXt"
as.Date(ISOdate(y, m, d))
## [1] "2020-12-31"
years <- c(2025, 2026, 2027, 2028)
months <- c(1, 4, 7, 10)
days <- c(12, 19, 25, 17)
ISOdate(years, months, days)
## [1] "2025-01-12 12:00:00 GMT" "2026-04-19 12:00:00 GMT"
## [3] "2027-07-25 12:00:00 GMT" "2028-10-17 12:00:00 GMT"
jdate <- as.Date("2025-12-31")
jdate
## [1] "2025-12-31"
as.integer(jdate)
## [1] 20453
julian(jdate)
## [1] 20453
## attr(,"origin")
## [1] "1970-01-01"
as.integer(as.Date("1970-01-01"))
## [1] 0
as.integer(as.Date("1969-12-31"))
## [1] -1
as.integer(as.Date("1970-01-02"))
## [1] 1
as.Date(as.integer(jdate), origin = "1970-01-01")
## [1] "2025-12-31"
class(moon) # POSIXct
## [1] "POSIXct" "POSIXt"
moon + 60 * 60 * 2 # 2시간 후, 초단위로
## [1] "1969-07-20 22:17:39 UTC"
moon + 60 * 60 * 24 * 7
## [1] "1969-07-27 20:17:39 UTC"
moon - 60 * 60 * 24 * 7
## [1] "1969-07-13 20:17:39 UTC"
as.Date(moon) + 7
## [1] "1969-07-27"
start <- as.Date("1988-09-17")
end <- as.Date("2018-02-09")
start
## [1] "1988-09-17"
end
## [1] "2018-02-09"
end - start
## Time difference of 10737 days
today <- Sys.Date()
dooly <- as.Date("1983-04-22")
difftime(today, dooly, units = "days")
## Time difference of 13615 days
difftime(today, dooly, units = "weeks")
## Time difference of 1945 weeks
class(moon)
## [1] "POSIXct" "POSIXt"
Sys.time() > moon
## [1] TRUE
Sys.Date() > as.Date(moon)
## [1] TRUE

함수

function(), ls(), rm()

rm(list = ls())
transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  result <- paste(tlength, "m", sep = "")
  return(result)
}
ls()
## [1] "transLength"
y <- c(100, 150, 200)
transLength(y)
## [1] "91.4m"  "137.2m" "182.9m"
trans2 <- transLength
trans2
## function(x) {
##   tlength <- round(x * 0.9144, digits = 1)
##   result <- paste(tlength, "m", sep = "")
##   return(result)
## }
trans2(y)
## [1] "91.4m"  "137.2m" "182.9m"
transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  result <- paste(tlength, "m", sep = "")
}
transLength(y)
print(transLength(y))
## [1] "91.4m"  "137.2m" "182.9m"
transLength <- function(x) {
  tlength <- round(x * 0.9144, digits = 1)
  paste(tlength, "m", sep = "")
}
transLength(y)
## [1] "91.4m"  "137.2m" "182.9m"
transLength <- function(x) {
  if (!is.numeric(x)) return("Not a Number")
  tlength <- round(x * 0.9144, digits = 1)
  paste(tlength, "m", sep = "")
}
transLength("ABC")
## [1] "Not a Number"
f1 <- function(x, y) {x + y}
f2 <- function(x, y) x + y
f1(1, 3)
## [1] 4
f2(1, 3)
## [1] 4
transLength <- function(x) paste(round(x * 0.9144, digits = 1), "m", sep = "")
transLength(y)
## [1] "91.4m"  "137.2m" "182.9m"
transLength <- function(x, mult, unit) {
  tlength <- round(x * mult, digits = 1)
  paste(tlength, unit, sep = "")
}
transLength(y, mult = 3, unit = "ft")
## [1] "300ft" "450ft" "600ft"
transLength(y, mult = 36, unit = "in")
## [1] "3600in" "5400in" "7200in"
# transLength(y) # ERROR!

transLength <- function(x, mult = 0.9144, unit = "m") {
  tlength <- round(x * mult, digits = 1)
  paste(tlength, unit, sep = "")
}
transLength(y)
## [1] "91.4m"  "137.2m" "182.9m"
transLength(y, mult = 3, unit = "ft")
## [1] "300ft" "450ft" "600ft"
transLength(y, 3, "ft")
## [1] "300ft" "450ft" "600ft"
transLength <- function(x, mult = 0.9144, unit = "m", ...) {
  tlength <- round(x * mult, ...)
  paste(tlength, unit, sep = "")
}
transLength(y, digits = 2)
## [1] "91.44m"  "137.16m" "182.88m"
transLength(y)
## [1] "91m"  "137m" "183m"
transLength <- function(x, mult = 0.9144, unit = "m", digits = 1) {
  tlength <- round(x * mult, digits = digits)
  paste(tlength, unit, sep = "")
}
transLength(y, digits = 2)
## [1] "91.44m"  "137.16m" "182.88m"
transLength(y)
## [1] "91.4m"  "137.2m" "182.9m"
transLength <- function(x, mult = 0.9144, unit = "m", FUN = round, ...) {
  tlength <- FUN(x * mult, ...)
  paste(tlength, unit, sep = "")
}
transLength(y, FUN = signif, digits = 3)
## [1] "91.4m" "137m"  "183m"
transLength(y, FUN = floor)
## [1] "91m"  "137m" "182m"
transLength(y)
## [1] "91m"  "137m" "183m"
x <- 11:15
scopetest <- function(x) {
  cat("This is x: ", x, "\n")
  rm(x)
  cat("This is x after removing x", x, "\n")
}
scopetest(x = 15:11)
## This is x:  15 14 13 12 11 
## This is x after removing x 11 12 13 14 15

논리흐름 제어

if(), ifelse()

x <- pi
y <- 3
if (x > y) x
## [1] 3.141593
if (x < y) x

if (x < y) x else y
## [1] 3
x <- pi
y <- 1:5
if (x < y) x else y
## Warning in if (x < y) x else y: length > 1 이라는 조건이 있고, 첫번째 요소만이
## 사용될 것입니다
## [1] 1 2 3 4 5
if (x > y) x else y
## Warning in if (x > y) x else y: length > 1 이라는 조건이 있고, 첫번째 요소만이
## 사용될 것입니다
## [1] 3.141593
test <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
yes <- 1:5
no <- 0
ifelse(test, yes, no)
## [1] 1 0 3 4 0
ifelse(x > y, x, y)
## [1] 3.141593 3.141593 3.141593 4.000000 5.000000

switch()

center <- function(x, type) {
  switch(type,
         mean = mean(x),
         median = median(x),
         trimmed = mean(x, trim = 0.1),
         "Choose one of mean, median, and trimmed"
         )
}
x <- c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
center(x, "mean")
## [1] 12.9
center(x, "median")
## [1] 12
center(x, "trimmed")
## [1] 12.25
center(x, "other")
## [1] "Choose one of mean, median, and trimmed"

repeat(), while(), for()

# repeat print("hello") # 무한 반복, 멈추려면 ESC

i <- 5
repeat {if (i > 25) break
  else {
    print(i)
    i <- i + 5}
  }
## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25
i <- 5
while(i <= 25) {
  print(i)
  i <- i + 5
}
## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25
for (i in seq(from = 5, to = 25, by = 5)) print(i)
## [1] 5
## [1] 10
## [1] 15
## [1] 20
## [1] 25
for (i in seq(from = 5, to = 25, by = 5)) i

i <- 1
for (i in seq(from = 5, to = 25, by = 5)) i
i
## [1] 25

서브셋

subset()

str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
mtcars$mpg
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
mtcars[["mpg"]]
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
mtcars[[1]]
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
mtcars[c(1, 4)]
##                      mpg  hp
## Mazda RX4           21.0 110
## Mazda RX4 Wag       21.0 110
## Datsun 710          22.8  93
## Hornet 4 Drive      21.4 110
## Hornet Sportabout   18.7 175
## Valiant             18.1 105
## Duster 360          14.3 245
## Merc 240D           24.4  62
## Merc 230            22.8  95
## Merc 280            19.2 123
## Merc 280C           17.8 123
## Merc 450SE          16.4 180
## Merc 450SL          17.3 180
## Merc 450SLC         15.2 180
## Cadillac Fleetwood  10.4 205
## Lincoln Continental 10.4 215
## Chrysler Imperial   14.7 230
## Fiat 128            32.4  66
## Honda Civic         30.4  52
## Toyota Corolla      33.9  65
## Toyota Corona       21.5  97
## Dodge Challenger    15.5 150
## AMC Javelin         15.2 150
## Camaro Z28          13.3 245
## Pontiac Firebird    19.2 175
## Fiat X1-9           27.3  66
## Porsche 914-2       26.0  91
## Lotus Europa        30.4 113
## Ford Pantera L      15.8 264
## Ferrari Dino        19.7 175
## Maserati Bora       15.0 335
## Volvo 142E          21.4 109
mtcars[c("mpg", "hp")]
##                      mpg  hp
## Mazda RX4           21.0 110
## Mazda RX4 Wag       21.0 110
## Datsun 710          22.8  93
## Hornet 4 Drive      21.4 110
## Hornet Sportabout   18.7 175
## Valiant             18.1 105
## Duster 360          14.3 245
## Merc 240D           24.4  62
## Merc 230            22.8  95
## Merc 280            19.2 123
## Merc 280C           17.8 123
## Merc 450SE          16.4 180
## Merc 450SL          17.3 180
## Merc 450SLC         15.2 180
## Cadillac Fleetwood  10.4 205
## Lincoln Continental 10.4 215
## Chrysler Imperial   14.7 230
## Fiat 128            32.4  66
## Honda Civic         30.4  52
## Toyota Corolla      33.9  65
## Toyota Corona       21.5  97
## Dodge Challenger    15.5 150
## AMC Javelin         15.2 150
## Camaro Z28          13.3 245
## Pontiac Firebird    19.2 175
## Fiat X1-9           27.3  66
## Porsche 914-2       26.0  91
## Lotus Europa        30.4 113
## Ford Pantera L      15.8 264
## Ferrari Dino        19.7 175
## Maserati Bora       15.0 335
## Volvo 142E          21.4 109
mtcars[-c(2, 3, 5, 7:11)]
##                      mpg  hp    wt
## Mazda RX4           21.0 110 2.620
## Mazda RX4 Wag       21.0 110 2.875
## Datsun 710          22.8  93 2.320
## Hornet 4 Drive      21.4 110 3.215
## Hornet Sportabout   18.7 175 3.440
## Valiant             18.1 105 3.460
## Duster 360          14.3 245 3.570
## Merc 240D           24.4  62 3.190
## Merc 230            22.8  95 3.150
## Merc 280            19.2 123 3.440
## Merc 280C           17.8 123 3.440
## Merc 450SE          16.4 180 4.070
## Merc 450SL          17.3 180 3.730
## Merc 450SLC         15.2 180 3.780
## Cadillac Fleetwood  10.4 205 5.250
## Lincoln Continental 10.4 215 5.424
## Chrysler Imperial   14.7 230 5.345
## Fiat 128            32.4  66 2.200
## Honda Civic         30.4  52 1.615
## Toyota Corolla      33.9  65 1.835
## Toyota Corona       21.5  97 2.465
## Dodge Challenger    15.5 150 3.520
## AMC Javelin         15.2 150 3.435
## Camaro Z28          13.3 245 3.840
## Pontiac Firebird    19.2 175 3.845
## Fiat X1-9           27.3  66 1.935
## Porsche 914-2       26.0  91 2.140
## Lotus Europa        30.4 113 1.513
## Ford Pantera L      15.8 264 3.170
## Ferrari Dino        19.7 175 2.770
## Maserati Bora       15.0 335 3.570
## Volvo 142E          21.4 109 2.780
mtcars[-1]
##                     cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4             6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag         6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710            4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive        6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout     8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant               6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360            8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D             4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230              4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280              6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C             6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE            8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL            8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC           8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood    8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial     8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128              4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic           4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla        4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona         4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger      8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin           8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28            8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird      8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9             4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2         4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa          4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L        8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino          6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora         8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E            4 121.0 109 4.11 2.780 18.60  1  1    4    2
mtcars[1] <- NULL
mtcars
##                     cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4             6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag         6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710            4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive        6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout     8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant               6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360            8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D             4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230              4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280              6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C             6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE            8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL            8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC           8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood    8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial     8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128              4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic           4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla        4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona         4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger      8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin           8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28            8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird      8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9             4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2         4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa          4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L        8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino          6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora         8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E            4 121.0 109 4.11 2.780 18.60  1  1    4    2
# mtcars[c(-1, 2)] # ERROR

str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
iris[1:5, ]
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
iris[, c("Sepal.Length", "Sepal.Width")]
##     Sepal.Length Sepal.Width
## 1            5.1         3.5
## 2            4.9         3.0
## 3            4.7         3.2
## 4            4.6         3.1
## 5            5.0         3.6
## 6            5.4         3.9
## 7            4.6         3.4
## 8            5.0         3.4
## 9            4.4         2.9
## 10           4.9         3.1
## 11           5.4         3.7
## 12           4.8         3.4
## 13           4.8         3.0
## 14           4.3         3.0
## 15           5.8         4.0
## 16           5.7         4.4
## 17           5.4         3.9
## 18           5.1         3.5
## 19           5.7         3.8
## 20           5.1         3.8
## 21           5.4         3.4
## 22           5.1         3.7
## 23           4.6         3.6
## 24           5.1         3.3
## 25           4.8         3.4
## 26           5.0         3.0
## 27           5.0         3.4
## 28           5.2         3.5
## 29           5.2         3.4
## 30           4.7         3.2
## 31           4.8         3.1
## 32           5.4         3.4
## 33           5.2         4.1
## 34           5.5         4.2
## 35           4.9         3.1
## 36           5.0         3.2
## 37           5.5         3.5
## 38           4.9         3.6
## 39           4.4         3.0
## 40           5.1         3.4
## 41           5.0         3.5
## 42           4.5         2.3
## 43           4.4         3.2
## 44           5.0         3.5
## 45           5.1         3.8
## 46           4.8         3.0
## 47           5.1         3.8
## 48           4.6         3.2
## 49           5.3         3.7
## 50           5.0         3.3
## 51           7.0         3.2
## 52           6.4         3.2
## 53           6.9         3.1
## 54           5.5         2.3
## 55           6.5         2.8
## 56           5.7         2.8
## 57           6.3         3.3
## 58           4.9         2.4
## 59           6.6         2.9
## 60           5.2         2.7
## 61           5.0         2.0
## 62           5.9         3.0
## 63           6.0         2.2
## 64           6.1         2.9
## 65           5.6         2.9
## 66           6.7         3.1
## 67           5.6         3.0
## 68           5.8         2.7
## 69           6.2         2.2
## 70           5.6         2.5
## 71           5.9         3.2
## 72           6.1         2.8
## 73           6.3         2.5
## 74           6.1         2.8
## 75           6.4         2.9
## 76           6.6         3.0
## 77           6.8         2.8
## 78           6.7         3.0
## 79           6.0         2.9
## 80           5.7         2.6
## 81           5.5         2.4
## 82           5.5         2.4
## 83           5.8         2.7
## 84           6.0         2.7
## 85           5.4         3.0
## 86           6.0         3.4
## 87           6.7         3.1
## 88           6.3         2.3
## 89           5.6         3.0
## 90           5.5         2.5
## 91           5.5         2.6
## 92           6.1         3.0
## 93           5.8         2.6
## 94           5.0         2.3
## 95           5.6         2.7
## 96           5.7         3.0
## 97           5.7         2.9
## 98           6.2         2.9
## 99           5.1         2.5
## 100          5.7         2.8
## 101          6.3         3.3
## 102          5.8         2.7
## 103          7.1         3.0
## 104          6.3         2.9
## 105          6.5         3.0
## 106          7.6         3.0
## 107          4.9         2.5
## 108          7.3         2.9
## 109          6.7         2.5
## 110          7.2         3.6
## 111          6.5         3.2
## 112          6.4         2.7
## 113          6.8         3.0
## 114          5.7         2.5
## 115          5.8         2.8
## 116          6.4         3.2
## 117          6.5         3.0
## 118          7.7         3.8
## 119          7.7         2.6
## 120          6.0         2.2
## 121          6.9         3.2
## 122          5.6         2.8
## 123          7.7         2.8
## 124          6.3         2.7
## 125          6.7         3.3
## 126          7.2         3.2
## 127          6.2         2.8
## 128          6.1         3.0
## 129          6.4         2.8
## 130          7.2         3.0
## 131          7.4         2.8
## 132          7.9         3.8
## 133          6.4         2.8
## 134          6.3         2.8
## 135          6.1         2.6
## 136          7.7         3.0
## 137          6.3         3.4
## 138          6.4         3.1
## 139          6.0         3.0
## 140          6.9         3.1
## 141          6.7         3.1
## 142          6.9         3.1
## 143          5.8         2.7
## 144          6.8         3.2
## 145          6.7         3.3
## 146          6.7         3.0
## 147          6.3         2.5
## 148          6.5         3.0
## 149          6.2         3.4
## 150          5.9         3.0
iris[, "Sepal.Length"]
##   [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
##  [19] 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.0
##  [37] 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0 6.4 6.9 5.5
##  [55] 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
##  [73] 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4 6.0 6.7 6.3 5.6 5.5
##  [91] 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
## [109] 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.0 6.9 5.6 7.7 6.3 6.7 7.2
## [127] 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8
## [145] 6.7 6.7 6.3 6.5 6.2 5.9
iris[, "Sepal.Length", drop = FALSE]
##     Sepal.Length
## 1            5.1
## 2            4.9
## 3            4.7
## 4            4.6
## 5            5.0
## 6            5.4
## 7            4.6
## 8            5.0
## 9            4.4
## 10           4.9
## 11           5.4
## 12           4.8
## 13           4.8
## 14           4.3
## 15           5.8
## 16           5.7
## 17           5.4
## 18           5.1
## 19           5.7
## 20           5.1
## 21           5.4
## 22           5.1
## 23           4.6
## 24           5.1
## 25           4.8
## 26           5.0
## 27           5.0
## 28           5.2
## 29           5.2
## 30           4.7
## 31           4.8
## 32           5.4
## 33           5.2
## 34           5.5
## 35           4.9
## 36           5.0
## 37           5.5
## 38           4.9
## 39           4.4
## 40           5.1
## 41           5.0
## 42           4.5
## 43           4.4
## 44           5.0
## 45           5.1
## 46           4.8
## 47           5.1
## 48           4.6
## 49           5.3
## 50           5.0
## 51           7.0
## 52           6.4
## 53           6.9
## 54           5.5
## 55           6.5
## 56           5.7
## 57           6.3
## 58           4.9
## 59           6.6
## 60           5.2
## 61           5.0
## 62           5.9
## 63           6.0
## 64           6.1
## 65           5.6
## 66           6.7
## 67           5.6
## 68           5.8
## 69           6.2
## 70           5.6
## 71           5.9
## 72           6.1
## 73           6.3
## 74           6.1
## 75           6.4
## 76           6.6
## 77           6.8
## 78           6.7
## 79           6.0
## 80           5.7
## 81           5.5
## 82           5.5
## 83           5.8
## 84           6.0
## 85           5.4
## 86           6.0
## 87           6.7
## 88           6.3
## 89           5.6
## 90           5.5
## 91           5.5
## 92           6.1
## 93           5.8
## 94           5.0
## 95           5.6
## 96           5.7
## 97           5.7
## 98           6.2
## 99           5.1
## 100          5.7
## 101          6.3
## 102          5.8
## 103          7.1
## 104          6.3
## 105          6.5
## 106          7.6
## 107          4.9
## 108          7.3
## 109          6.7
## 110          7.2
## 111          6.5
## 112          6.4
## 113          6.8
## 114          5.7
## 115          5.8
## 116          6.4
## 117          6.5
## 118          7.7
## 119          7.7
## 120          6.0
## 121          6.9
## 122          5.6
## 123          7.7
## 124          6.3
## 125          6.7
## 126          7.2
## 127          6.2
## 128          6.1
## 129          6.4
## 130          7.2
## 131          7.4
## 132          7.9
## 133          6.4
## 134          6.3
## 135          6.1
## 136          7.7
## 137          6.3
## 138          6.4
## 139          6.0
## 140          6.9
## 141          6.7
## 142          6.9
## 143          5.8
## 144          6.8
## 145          6.7
## 146          6.7
## 147          6.3
## 148          6.5
## 149          6.2
## 150          5.9
iris["Sepal.Length"]
##     Sepal.Length
## 1            5.1
## 2            4.9
## 3            4.7
## 4            4.6
## 5            5.0
## 6            5.4
## 7            4.6
## 8            5.0
## 9            4.4
## 10           4.9
## 11           5.4
## 12           4.8
## 13           4.8
## 14           4.3
## 15           5.8
## 16           5.7
## 17           5.4
## 18           5.1
## 19           5.7
## 20           5.1
## 21           5.4
## 22           5.1
## 23           4.6
## 24           5.1
## 25           4.8
## 26           5.0
## 27           5.0
## 28           5.2
## 29           5.2
## 30           4.7
## 31           4.8
## 32           5.4
## 33           5.2
## 34           5.5
## 35           4.9
## 36           5.0
## 37           5.5
## 38           4.9
## 39           4.4
## 40           5.1
## 41           5.0
## 42           4.5
## 43           4.4
## 44           5.0
## 45           5.1
## 46           4.8
## 47           5.1
## 48           4.6
## 49           5.3
## 50           5.0
## 51           7.0
## 52           6.4
## 53           6.9
## 54           5.5
## 55           6.5
## 56           5.7
## 57           6.3
## 58           4.9
## 59           6.6
## 60           5.2
## 61           5.0
## 62           5.9
## 63           6.0
## 64           6.1
## 65           5.6
## 66           6.7
## 67           5.6
## 68           5.8
## 69           6.2
## 70           5.6
## 71           5.9
## 72           6.1
## 73           6.3
## 74           6.1
## 75           6.4
## 76           6.6
## 77           6.8
## 78           6.7
## 79           6.0
## 80           5.7
## 81           5.5
## 82           5.5
## 83           5.8
## 84           6.0
## 85           5.4
## 86           6.0
## 87           6.7
## 88           6.3
## 89           5.6
## 90           5.5
## 91           5.5
## 92           6.1
## 93           5.8
## 94           5.0
## 95           5.6
## 96           5.7
## 97           5.7
## 98           6.2
## 99           5.1
## 100          5.7
## 101          6.3
## 102          5.8
## 103          7.1
## 104          6.3
## 105          6.5
## 106          7.6
## 107          4.9
## 108          7.3
## 109          6.7
## 110          7.2
## 111          6.5
## 112          6.4
## 113          6.8
## 114          5.7
## 115          5.8
## 116          6.4
## 117          6.5
## 118          7.7
## 119          7.7
## 120          6.0
## 121          6.9
## 122          5.6
## 123          7.7
## 124          6.3
## 125          6.7
## 126          7.2
## 127          6.2
## 128          6.1
## 129          6.4
## 130          7.2
## 131          7.4
## 132          7.9
## 133          6.4
## 134          6.3
## 135          6.1
## 136          7.7
## 137          6.3
## 138          6.4
## 139          6.0
## 140          6.9
## 141          6.7
## 142          6.9
## 143          5.8
## 144          6.8
## 145          6.7
## 146          6.7
## 147          6.3
## 148          6.5
## 149          6.2
## 150          5.9
iris[1:5, c("Sepal.Length", "Sepal.Width")]
##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6
iris[iris$Sepal.Length > 7,]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 103          7.1         3.0          5.9         2.1 virginica
## 106          7.6         3.0          6.6         2.1 virginica
## 108          7.3         2.9          6.3         1.8 virginica
## 110          7.2         3.6          6.1         2.5 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 126          7.2         3.2          6.0         1.8 virginica
## 130          7.2         3.0          5.8         1.6 virginica
## 131          7.4         2.8          6.1         1.9 virginica
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
iris[iris$Sepal.Length > 7, c("Sepal.Length", "Sepal.Width", "Species")]
##     Sepal.Length Sepal.Width   Species
## 103          7.1         3.0 virginica
## 106          7.6         3.0 virginica
## 108          7.3         2.9 virginica
## 110          7.2         3.6 virginica
## 118          7.7         3.8 virginica
## 119          7.7         2.6 virginica
## 123          7.7         2.8 virginica
## 126          7.2         3.2 virginica
## 130          7.2         3.0 virginica
## 131          7.4         2.8 virginica
## 132          7.9         3.8 virginica
## 136          7.7         3.0 virginica
subset(iris,
       subset = (Sepal.Length > 7),
       select = c("Sepal.Length", "Sepal.Width", "Species"))
##     Sepal.Length Sepal.Width   Species
## 103          7.1         3.0 virginica
## 106          7.6         3.0 virginica
## 108          7.3         2.9 virginica
## 110          7.2         3.6 virginica
## 118          7.7         3.8 virginica
## 119          7.7         2.6 virginica
## 123          7.7         2.8 virginica
## 126          7.2         3.2 virginica
## 130          7.2         3.0 virginica
## 131          7.4         2.8 virginica
## 132          7.9         3.8 virginica
## 136          7.7         3.0 virginica

sample(), set.seed()

sample(x = 1:10, size = 5)
## [1] 9 4 3 1 6
sample(x = 10, size = 5)
## [1]  9  1  7 10  6
sample(x = 10, size = 5, replace = TRUE)
## [1] 1 4 5 9 7
sample(10)
##  [1]  4  5  8 10  1  2  3  9  6  7
set.seed(1)
sample(x = 10, size = 5, replace = TRUE)
## [1] 9 4 7 1 2
sample(x = 10, size = 5, replace = TRUE)
## [1] 7 2 3 1 5
set.seed(1)
sample(x = 10, size = 5, replace = TRUE)
## [1] 9 4 7 1 2
sample(iris, 3)
##        Species Petal.Length Petal.Width
## 1       setosa          1.4         0.2
## 2       setosa          1.4         0.2
## 3       setosa          1.3         0.2
## 4       setosa          1.5         0.2
## 5       setosa          1.4         0.2
## 6       setosa          1.7         0.4
## 7       setosa          1.4         0.3
## 8       setosa          1.5         0.2
## 9       setosa          1.4         0.2
## 10      setosa          1.5         0.1
## 11      setosa          1.5         0.2
## 12      setosa          1.6         0.2
## 13      setosa          1.4         0.1
## 14      setosa          1.1         0.1
## 15      setosa          1.2         0.2
## 16      setosa          1.5         0.4
## 17      setosa          1.3         0.4
## 18      setosa          1.4         0.3
## 19      setosa          1.7         0.3
## 20      setosa          1.5         0.3
## 21      setosa          1.7         0.2
## 22      setosa          1.5         0.4
## 23      setosa          1.0         0.2
## 24      setosa          1.7         0.5
## 25      setosa          1.9         0.2
## 26      setosa          1.6         0.2
## 27      setosa          1.6         0.4
## 28      setosa          1.5         0.2
## 29      setosa          1.4         0.2
## 30      setosa          1.6         0.2
## 31      setosa          1.6         0.2
## 32      setosa          1.5         0.4
## 33      setosa          1.5         0.1
## 34      setosa          1.4         0.2
## 35      setosa          1.5         0.2
## 36      setosa          1.2         0.2
## 37      setosa          1.3         0.2
## 38      setosa          1.4         0.1
## 39      setosa          1.3         0.2
## 40      setosa          1.5         0.2
## 41      setosa          1.3         0.3
## 42      setosa          1.3         0.3
## 43      setosa          1.3         0.2
## 44      setosa          1.6         0.6
## 45      setosa          1.9         0.4
## 46      setosa          1.4         0.3
## 47      setosa          1.6         0.2
## 48      setosa          1.4         0.2
## 49      setosa          1.5         0.2
## 50      setosa          1.4         0.2
## 51  versicolor          4.7         1.4
## 52  versicolor          4.5         1.5
## 53  versicolor          4.9         1.5
## 54  versicolor          4.0         1.3
## 55  versicolor          4.6         1.5
## 56  versicolor          4.5         1.3
## 57  versicolor          4.7         1.6
## 58  versicolor          3.3         1.0
## 59  versicolor          4.6         1.3
## 60  versicolor          3.9         1.4
## 61  versicolor          3.5         1.0
## 62  versicolor          4.2         1.5
## 63  versicolor          4.0         1.0
## 64  versicolor          4.7         1.4
## 65  versicolor          3.6         1.3
## 66  versicolor          4.4         1.4
## 67  versicolor          4.5         1.5
## 68  versicolor          4.1         1.0
## 69  versicolor          4.5         1.5
## 70  versicolor          3.9         1.1
## 71  versicolor          4.8         1.8
## 72  versicolor          4.0         1.3
## 73  versicolor          4.9         1.5
## 74  versicolor          4.7         1.2
## 75  versicolor          4.3         1.3
## 76  versicolor          4.4         1.4
## 77  versicolor          4.8         1.4
## 78  versicolor          5.0         1.7
## 79  versicolor          4.5         1.5
## 80  versicolor          3.5         1.0
## 81  versicolor          3.8         1.1
## 82  versicolor          3.7         1.0
## 83  versicolor          3.9         1.2
## 84  versicolor          5.1         1.6
## 85  versicolor          4.5         1.5
## 86  versicolor          4.5         1.6
## 87  versicolor          4.7         1.5
## 88  versicolor          4.4         1.3
## 89  versicolor          4.1         1.3
## 90  versicolor          4.0         1.3
## 91  versicolor          4.4         1.2
## 92  versicolor          4.6         1.4
## 93  versicolor          4.0         1.2
## 94  versicolor          3.3         1.0
## 95  versicolor          4.2         1.3
## 96  versicolor          4.2         1.2
## 97  versicolor          4.2         1.3
## 98  versicolor          4.3         1.3
## 99  versicolor          3.0         1.1
## 100 versicolor          4.1         1.3
## 101  virginica          6.0         2.5
## 102  virginica          5.1         1.9
## 103  virginica          5.9         2.1
## 104  virginica          5.6         1.8
## 105  virginica          5.8         2.2
## 106  virginica          6.6         2.1
## 107  virginica          4.5         1.7
## 108  virginica          6.3         1.8
## 109  virginica          5.8         1.8
## 110  virginica          6.1         2.5
## 111  virginica          5.1         2.0
## 112  virginica          5.3         1.9
## 113  virginica          5.5         2.1
## 114  virginica          5.0         2.0
## 115  virginica          5.1         2.4
## 116  virginica          5.3         2.3
## 117  virginica          5.5         1.8
## 118  virginica          6.7         2.2
## 119  virginica          6.9         2.3
## 120  virginica          5.0         1.5
## 121  virginica          5.7         2.3
## 122  virginica          4.9         2.0
## 123  virginica          6.7         2.0
## 124  virginica          4.9         1.8
## 125  virginica          5.7         2.1
## 126  virginica          6.0         1.8
## 127  virginica          4.8         1.8
## 128  virginica          4.9         1.8
## 129  virginica          5.6         2.1
## 130  virginica          5.8         1.6
## 131  virginica          6.1         1.9
## 132  virginica          6.4         2.0
## 133  virginica          5.6         2.2
## 134  virginica          5.1         1.5
## 135  virginica          5.6         1.4
## 136  virginica          6.1         2.3
## 137  virginica          5.6         2.4
## 138  virginica          5.5         1.8
## 139  virginica          4.8         1.8
## 140  virginica          5.4         2.1
## 141  virginica          5.6         2.4
## 142  virginica          5.1         2.3
## 143  virginica          5.1         1.9
## 144  virginica          5.9         2.3
## 145  virginica          5.7         2.5
## 146  virginica          5.2         2.3
## 147  virginica          5.0         1.9
## 148  virginica          5.2         2.0
## 149  virginica          5.4         2.3
## 150  virginica          5.1         1.8
set.seed(1)
index <- sample(nrow(iris), 3)
index
## [1]  68 129  43
iris[index,]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 68           5.8         2.7          4.1         1.0 versicolor
## 129          6.4         2.8          5.6         2.1  virginica
## 43           4.4         3.2          1.3         0.2     setosa

duplicated(), which(), unique()

duplicated(c(1, 2, 3, 1, 1, 4, 3))
## [1] FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE
id <- c("A001", "A002", "A003")
name <- c("Mouse", "Keyboard", "USB")
price <- c(30000, 90000, 50000)
product <- data.frame(id = id, name = name, price = price)
product
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
product <- rbind(product, c("A001", "Mouse", 30000))
product
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
## 4 A001    Mouse 30000
duplicated(product)
## [1] FALSE FALSE FALSE  TRUE
product[!duplicated(product), ]
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
which(duplicated(product))
## [1] 4
index <- which(duplicated(product))
product[-index,]
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000
unique(product)
##     id     name price
## 1 A001    Mouse 30000
## 2 A002 Keyboard 90000
## 3 A003      USB 50000

complete.cases(), na.omit(), cut()

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
complete.cases(airquality)
##   [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [73]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [97] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [109]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
airquality.nona <- airquality[complete.cases(airquality),]
str(airquality.nona)
## 'data.frame':    111 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 23 19 8 16 11 14 ...
##  $ Solar.R: int  190 118 149 313 299 99 19 256 290 274 ...
##  $ Wind   : num  7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
##  $ Temp   : int  67 72 74 62 65 59 61 69 66 68 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 7 8 9 12 13 14 ...
airquality.nona <- na.omit(airquality)
str(airquality.nona)
## 'data.frame':    111 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 23 19 8 16 11 14 ...
##  $ Solar.R: int  190 118 149 313 299 99 19 256 290 274 ...
##  $ Wind   : num  7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
##  $ Temp   : int  67 72 74 62 65 59 61 69 66 68 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 7 8 9 12 13 14 ...
##  - attr(*, "na.action")= 'omit' Named int [1:42] 5 6 10 11 25 26 27 32 33 34 ...
##   ..- attr(*, "names")= chr [1:42] "5" "6" "10" "11" ...
cut(x = iris$Sepal.Width, breaks = c(0, 1, 2, 3 ,4, 5))
##   [1] (3,4] (2,3] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4]
##  [13] (2,3] (2,3] (3,4] (4,5] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4]
##  [25] (3,4] (2,3] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (4,5] (4,5] (3,4] (3,4]
##  [37] (3,4] (3,4] (2,3] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4] (2,3] (3,4] (3,4]
##  [49] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3]
##  [61] (1,2] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3]
##  [73] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
##  [85] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
##  [97] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3] (2,3]
## [109] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (3,4] (2,3] (3,4] (2,3] (2,3]
## [121] (3,4] (2,3] (2,3] (2,3] (3,4] (3,4] (2,3] (2,3] (2,3] (2,3] (2,3] (3,4]
## [133] (2,3] (2,3] (2,3] (2,3] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4] (2,3] (3,4]
## [145] (3,4] (2,3] (2,3] (2,3] (3,4] (2,3]
## Levels: (0,1] (1,2] (2,3] (3,4] (4,5]
cut(x = iris$Sepal.Width, breaks = 5)
##   [1] (3.44,3.92] (2.96,3.44] (2.96,3.44] (2.96,3.44] (3.44,3.92] (3.44,3.92]
##   [7] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44] (3.44,3.92] (2.96,3.44]
##  [13] (2.96,3.44] (2.96,3.44] (3.92,4.4]  (3.92,4.4]  (3.44,3.92] (3.44,3.92]
##  [19] (3.44,3.92] (3.44,3.92] (2.96,3.44] (3.44,3.92] (3.44,3.92] (2.96,3.44]
##  [25] (2.96,3.44] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2.96,3.44] (2.96,3.44]
##  [31] (2.96,3.44] (2.96,3.44] (3.92,4.4]  (3.92,4.4]  (2.96,3.44] (2.96,3.44]
##  [37] (3.44,3.92] (3.44,3.92] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2,2.48]   
##  [43] (2.96,3.44] (3.44,3.92] (3.44,3.92] (2.96,3.44] (3.44,3.92] (2.96,3.44]
##  [49] (3.44,3.92] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2,2.48]   
##  [55] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2,2.48]    (2.48,2.96] (2.48,2.96]
##  [61] (2,2.48]    (2.96,3.44] (2,2.48]    (2.48,2.96] (2.48,2.96] (2.96,3.44]
##  [67] (2.96,3.44] (2.48,2.96] (2,2.48]    (2.48,2.96] (2.96,3.44] (2.48,2.96]
##  [73] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2.96,3.44]
##  [79] (2.48,2.96] (2.48,2.96] (2,2.48]    (2,2.48]    (2.48,2.96] (2.48,2.96]
##  [85] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2,2.48]    (2.96,3.44] (2.48,2.96]
##  [91] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2,2.48]    (2.48,2.96] (2.96,3.44]
##  [97] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.48,2.96]
## [103] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.48,2.96]
## [109] (2.48,2.96] (3.44,3.92] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.48,2.96]
## [115] (2.48,2.96] (2.96,3.44] (2.96,3.44] (3.44,3.92] (2.48,2.96] (2,2.48]   
## [121] (2.96,3.44] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.96,3.44]
## [127] (2.48,2.96] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.48,2.96] (3.44,3.92]
## [133] (2.48,2.96] (2.48,2.96] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.96,3.44]
## [139] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44]
## [145] (2.96,3.44] (2.96,3.44] (2.48,2.96] (2.96,3.44] (2.96,3.44] (2.96,3.44]
## Levels: (2,2.48] (2.48,2.96] (2.96,3.44] (3.44,3.92] (3.92,4.4]
iris.cut <- cut(x = iris$Sepal.Width, breaks = c(0, 1, 2, 3 ,4, 5))
table(iris.cut)
## iris.cut
## (0,1] (1,2] (2,3] (3,4] (4,5] 
##     0     1    82    64     3
summary(iris.cut)
## (0,1] (1,2] (2,3] (3,4] (4,5] 
##     0     1    82    64     3
iris.cut <- cut(x = iris$Sepal.Width,
                breaks = c(0, 1, 2, 3 ,4, 5),
                labels = c("Smaller", "Small", "Medium", "Big", "Bigger"))
iris.cut
##   [1] Big    Medium Big    Big    Big    Big    Big    Big    Medium Big   
##  [11] Big    Big    Medium Medium Big    Bigger Big    Big    Big    Big   
##  [21] Big    Big    Big    Big    Big    Medium Big    Big    Big    Big   
##  [31] Big    Big    Bigger Bigger Big    Big    Big    Big    Medium Big   
##  [41] Big    Medium Big    Big    Big    Medium Big    Big    Big    Big   
##  [51] Big    Big    Big    Medium Medium Medium Big    Medium Medium Medium
##  [61] Small  Medium Medium Medium Medium Big    Medium Medium Medium Medium
##  [71] Big    Medium Medium Medium Medium Medium Medium Medium Medium Medium
##  [81] Medium Medium Medium Medium Medium Big    Big    Medium Medium Medium
##  [91] Medium Medium Medium Medium Medium Medium Medium Medium Medium Medium
## [101] Big    Medium Medium Medium Medium Medium Medium Medium Medium Big   
## [111] Big    Medium Medium Medium Medium Big    Medium Big    Medium Medium
## [121] Big    Medium Medium Medium Big    Big    Medium Medium Medium Medium
## [131] Medium Big    Medium Medium Medium Medium Big    Big    Medium Big   
## [141] Big    Big    Medium Big    Big    Medium Medium Medium Big    Medium
## Levels: Smaller Small Medium Big Bigger
table(iris.cut)
## iris.cut
## Smaller   Small  Medium     Big  Bigger 
##       0       1      82      64       3

반복 적용

apply()

2D 행렬에서

x <- matrix(1:20, 4, 5)
x
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    5    9   13   17
## [2,]    2    6   10   14   18
## [3,]    3    7   11   15   19
## [4,]    4    8   12   16   20
apply(X = x, MARGIN = 1, FUN = max)
## [1] 17 18 19 20
apply(X = x, MARGIN = 2, FUN = max)
## [1]  4  8 12 16 20

3D 배열에서

y <- array(1:24, c(4, 3, 2))
y
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24
apply(y, 1, paste, collapse = ",")
## [1] "1,5,9,13,17,21"  "2,6,10,14,18,22" "3,7,11,15,19,23" "4,8,12,16,20,24"
apply(y, 2, paste, collapse = ",")
## [1] "1,2,3,4,13,14,15,16"    "5,6,7,8,17,18,19,20"    "9,10,11,12,21,22,23,24"
apply(y, 3, paste, collapse = ",")
## [1] "1,2,3,4,5,6,7,8,9,10,11,12"          "13,14,15,16,17,18,19,20,21,22,23,24"
apply(y, c(1, 2), paste, collapse=",")
##      [,1]   [,2]   [,3]   
## [1,] "1,13" "5,17" "9,21" 
## [2,] "2,14" "6,18" "10,22"
## [3,] "3,15" "7,19" "11,23"
## [4,] "4,16" "8,20" "12,24"

4D 배열에서

Titanic
## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20
str(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
apply(Titanic, 1, sum)
##  1st  2nd  3rd Crew 
##  325  285  706  885
apply(Titanic, 4, sum)
##   No  Yes 
## 1490  711
apply(Titanic, "Class", sum)
##  1st  2nd  3rd Crew 
##  325  285  706  885
apply(Titanic, c(1, 4), sum)
##       Survived
## Class   No Yes
##   1st  122 203
##   2nd  167 118
##   3rd  528 178
##   Crew 673 212

lapply(), sapply()

행렬, 배열에서

exams <- list(s20 = c(78, 89, 91, 85, 85, 87),
              s21 = c(85, 86, 97, 99, 90),
              s22 = c(98, 96, 89, 90, 93, 85, 92),
              s23 = c(98, 96, 91, 88, 93, 99)
              )
exams
## $s20
## [1] 78 89 91 85 85 87
## 
## $s21
## [1] 85 86 97 99 90
## 
## $s22
## [1] 98 96 89 90 93 85 92
## 
## $s23
## [1] 98 96 91 88 93 99
lapply(exams, length)
## $s20
## [1] 6
## 
## $s21
## [1] 5
## 
## $s22
## [1] 7
## 
## $s23
## [1] 6
sapply(exams, length)
## s20 s21 s22 s23 
##   6   5   7   6
sapply(exams, mean)
##      s20      s21      s22      s23 
## 85.83333 91.40000 91.85714 94.16667
sapply(exams, sd)
##      s20      s21      s22      s23 
## 4.490731 6.348228 4.375255 4.262237
sapply(exams, range)
##      s20 s21 s22 s23
## [1,]  78  85  85  88
## [2,]  91  99  98  99

데이터프레임에서

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
lapply(iris, class)
## $Sepal.Length
## [1] "numeric"
## 
## $Sepal.Width
## [1] "numeric"
## 
## $Petal.Length
## [1] "numeric"
## 
## $Petal.Width
## [1] "numeric"
## 
## $Species
## [1] "factor"
sapply(iris, class)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##    "numeric"    "numeric"    "numeric"    "numeric"     "factor"
sapply(iris, mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##     5.843333     3.057333     3.758000     1.199333           NA
sapply(iris, function(x) ifelse(is.numeric(x), mean(x), NA))
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##     5.843333     3.057333     3.758000     1.199333           NA
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4

집단 요약

sapply()

data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
mtcars <- within(mtcars,
                 am <- factor(am,
                              levels = c(0, 1),
                              labels = c("Automatic",
                                         "Manual")
                              )
)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1
g <- split(x = mtcars$mpg, f = mtcars$am)
g
## $Automatic
##  [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5
## [16] 15.5 15.2 13.3 19.2
## 
## $Manual
##  [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4
mean(g[[1]])
## [1] 17.14737
mean(g[["Manual"]])
## [1] 24.39231
sapply(g, mean)
## Automatic    Manual 
##  17.14737  24.39231
unstack(data.frame(mtcars$mpg, mtcars$am))
## $Automatic
##  [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5
## [16] 15.5 15.2 13.3 19.2
## 
## $Manual
##  [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4

unstack()

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
gg <- unstack(data.frame(iris$Sepal.Length, iris$Species))
head(gg)
##   setosa versicolor virginica
## 1    5.1        7.0       6.3
## 2    4.9        6.4       5.8
## 3    4.7        6.9       7.1
## 4    4.6        5.5       6.3
## 5    5.0        6.5       6.5
## 6    5.4        5.7       7.6
str(gg)
## 'data.frame':    50 obs. of  3 variables:
##  $ setosa    : num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ versicolor: num  7 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 ...
##  $ virginica : num  6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 ...
summary(gg)
##      setosa        versicolor      virginica    
##  Min.   :4.300   Min.   :4.900   Min.   :4.900  
##  1st Qu.:4.800   1st Qu.:5.600   1st Qu.:6.225  
##  Median :5.000   Median :5.900   Median :6.500  
##  Mean   :5.006   Mean   :5.936   Mean   :6.588  
##  3rd Qu.:5.200   3rd Qu.:6.300   3rd Qu.:6.900  
##  Max.   :5.800   Max.   :7.000   Max.   :7.900

tapply()

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
tapply(X = iris$Sepal.Length, INDEX = iris$Species, FUN=mean)
##     setosa versicolor  virginica 
##      5.006      5.936      6.588
tapply(X = iris$Sepal.Length, INDEX = iris$Species, FUN=length)
##     setosa versicolor  virginica 
##         50         50         50
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1
with(mtcars, tapply(mpg, list(cyl, am), mean))
##   Automatic   Manual
## 4    22.900 28.07500
## 6    19.125 20.56667
## 8    15.050 15.40000
with(mtcars, tapply(mpg,
                    list(
                      Cyliner = cyl,
                      Transmission = am),
                    mean)
     )
##        Transmission
## Cyliner Automatic   Manual
##       4    22.900 28.07500
##       6    19.125 20.56667
##       8    15.050 15.40000

aggregate()

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs        am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0    Manual    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0    Manual    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1    Manual    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1 Automatic    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0 Automatic    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1 Automatic    3    1
with(mtcars, aggregate(x = mpg, by = list(cyl, am), FUN = mean))
##   Group.1   Group.2        x
## 1       4 Automatic 22.90000
## 2       6 Automatic 19.12500
## 3       8 Automatic 15.05000
## 4       4    Manual 28.07500
## 5       6    Manual 20.56667
## 6       8    Manual 15.40000
aggregate(mtcars[c(1:6)],
          list(Group.cyl = mtcars$cyl, Group.am = mtcars$am),
          mean)
##   Group.cyl  Group.am      mpg cyl     disp        hp     drat       wt
## 1         4 Automatic 22.90000   4 135.8667  84.66667 3.770000 2.935000
## 2         6 Automatic 19.12500   6 204.5500 115.25000 3.420000 3.388750
## 3         8 Automatic 15.05000   8 357.6167 194.16667 3.120833 4.104083
## 4         4    Manual 28.07500   4  93.6125  81.87500 4.183750 2.042250
## 5         6    Manual 20.56667   6 155.0000 131.66667 3.806667 2.755000
## 6         8    Manual 15.40000   8 326.0000 299.50000 3.880000 3.370000
aggregate(iris[1:4], list(Species = iris$Species), mean)
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026

by()

by(data = iris, INDICES = iris$Species, FUN = summary)
## iris$Species: setosa
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.300   Min.   :1.000   Min.   :0.100  
##  1st Qu.:4.800   1st Qu.:3.200   1st Qu.:1.400   1st Qu.:0.200  
##  Median :5.000   Median :3.400   Median :1.500   Median :0.200  
##  Mean   :5.006   Mean   :3.428   Mean   :1.462   Mean   :0.246  
##  3rd Qu.:5.200   3rd Qu.:3.675   3rd Qu.:1.575   3rd Qu.:0.300  
##  Max.   :5.800   Max.   :4.400   Max.   :1.900   Max.   :0.600  
##        Species  
##  setosa    :50  
##  versicolor: 0  
##  virginica : 0  
##                 
##                 
##                 
## ------------------------------------------------------------ 
## iris$Species: versicolor
##   Sepal.Length    Sepal.Width     Petal.Length   Petal.Width          Species  
##  Min.   :4.900   Min.   :2.000   Min.   :3.00   Min.   :1.000   setosa    : 0  
##  1st Qu.:5.600   1st Qu.:2.525   1st Qu.:4.00   1st Qu.:1.200   versicolor:50  
##  Median :5.900   Median :2.800   Median :4.35   Median :1.300   virginica : 0  
##  Mean   :5.936   Mean   :2.770   Mean   :4.26   Mean   :1.326                  
##  3rd Qu.:6.300   3rd Qu.:3.000   3rd Qu.:4.60   3rd Qu.:1.500                  
##  Max.   :7.000   Max.   :3.400   Max.   :5.10   Max.   :1.800                  
## ------------------------------------------------------------ 
## iris$Species: virginica
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.900   Min.   :2.200   Min.   :4.500   Min.   :1.400  
##  1st Qu.:6.225   1st Qu.:2.800   1st Qu.:5.100   1st Qu.:1.800  
##  Median :6.500   Median :3.000   Median :5.550   Median :2.000  
##  Mean   :6.588   Mean   :2.974   Mean   :5.552   Mean   :2.026  
##  3rd Qu.:6.900   3rd Qu.:3.175   3rd Qu.:5.875   3rd Qu.:2.300  
##  Max.   :7.900   Max.   :3.800   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    : 0  
##  versicolor: 0  
##  virginica :50  
##                 
##                 
## 

table()

table(mtcars$gear)
## 
##  3  4  5 
## 15 12  5
table(mtcars$am)
## 
## Automatic    Manual 
##        19        13
table(mtcars$am, mtcars$gear)
##            
##              3  4  5
##   Automatic 15  4  0
##   Manual     0  8  5

cut()

mpg.cut <- cut(mtcars$mpg, breaks = 5)
table(mpg.cut)
## mpg.cut
## (10.4,15.1] (15.1,19.8] (19.8,24.5] (24.5,29.2] (29.2,33.9] 
##           6          12           8           2           4

분할, 적용, 결합

# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

filter()

filter(airquality, Month == 6)
##    Ozone Solar.R Wind Temp Month Day
## 1     NA     286  8.6   78     6   1
## 2     NA     287  9.7   74     6   2
## 3     NA     242 16.1   67     6   3
## 4     NA     186  9.2   84     6   4
## 5     NA     220  8.6   85     6   5
## 6     NA     264 14.3   79     6   6
## 7     29     127  9.7   82     6   7
## 8     NA     273  6.9   87     6   8
## 9     71     291 13.8   90     6   9
## 10    39     323 11.5   87     6  10
## 11    NA     259 10.9   93     6  11
## 12    NA     250  9.2   92     6  12
## 13    23     148  8.0   82     6  13
## 14    NA     332 13.8   80     6  14
## 15    NA     322 11.5   79     6  15
## 16    21     191 14.9   77     6  16
## 17    37     284 20.7   72     6  17
## 18    20      37  9.2   65     6  18
## 19    12     120 11.5   73     6  19
## 20    13     137 10.3   76     6  20
## 21    NA     150  6.3   77     6  21
## 22    NA      59  1.7   76     6  22
## 23    NA      91  4.6   76     6  23
## 24    NA     250  6.3   76     6  24
## 25    NA     135  8.0   75     6  25
## 26    NA     127  8.0   78     6  26
## 27    NA      47 10.3   73     6  27
## 28    NA      98 11.5   80     6  28
## 29    NA      31 14.9   77     6  29
## 30    NA     138  8.0   83     6  30
airquality[airquality$Month == 6,]
##    Ozone Solar.R Wind Temp Month Day
## 32    NA     286  8.6   78     6   1
## 33    NA     287  9.7   74     6   2
## 34    NA     242 16.1   67     6   3
## 35    NA     186  9.2   84     6   4
## 36    NA     220  8.6   85     6   5
## 37    NA     264 14.3   79     6   6
## 38    29     127  9.7   82     6   7
## 39    NA     273  6.9   87     6   8
## 40    71     291 13.8   90     6   9
## 41    39     323 11.5   87     6  10
## 42    NA     259 10.9   93     6  11
## 43    NA     250  9.2   92     6  12
## 44    23     148  8.0   82     6  13
## 45    NA     332 13.8   80     6  14
## 46    NA     322 11.5   79     6  15
## 47    21     191 14.9   77     6  16
## 48    37     284 20.7   72     6  17
## 49    20      37  9.2   65     6  18
## 50    12     120 11.5   73     6  19
## 51    13     137 10.3   76     6  20
## 52    NA     150  6.3   77     6  21
## 53    NA      59  1.7   76     6  22
## 54    NA      91  4.6   76     6  23
## 55    NA     250  6.3   76     6  24
## 56    NA     135  8.0   75     6  25
## 57    NA     127  8.0   78     6  26
## 58    NA      47 10.3   73     6  27
## 59    NA      98 11.5   80     6  28
## 60    NA      31 14.9   77     6  29
## 61    NA     138  8.0   83     6  30
subset(airquality, subset = (Month == 6))
##    Ozone Solar.R Wind Temp Month Day
## 32    NA     286  8.6   78     6   1
## 33    NA     287  9.7   74     6   2
## 34    NA     242 16.1   67     6   3
## 35    NA     186  9.2   84     6   4
## 36    NA     220  8.6   85     6   5
## 37    NA     264 14.3   79     6   6
## 38    29     127  9.7   82     6   7
## 39    NA     273  6.9   87     6   8
## 40    71     291 13.8   90     6   9
## 41    39     323 11.5   87     6  10
## 42    NA     259 10.9   93     6  11
## 43    NA     250  9.2   92     6  12
## 44    23     148  8.0   82     6  13
## 45    NA     332 13.8   80     6  14
## 46    NA     322 11.5   79     6  15
## 47    21     191 14.9   77     6  16
## 48    37     284 20.7   72     6  17
## 49    20      37  9.2   65     6  18
## 50    12     120 11.5   73     6  19
## 51    13     137 10.3   76     6  20
## 52    NA     150  6.3   77     6  21
## 53    NA      59  1.7   76     6  22
## 54    NA      91  4.6   76     6  23
## 55    NA     250  6.3   76     6  24
## 56    NA     135  8.0   75     6  25
## 57    NA     127  8.0   78     6  26
## 58    NA      47 10.3   73     6  27
## 59    NA      98 11.5   80     6  28
## 60    NA      31 14.9   77     6  29
## 61    NA     138  8.0   83     6  30
air <- filter(airquality, Month == 6, Temp > 90)
air
##   Ozone Solar.R Wind Temp Month Day
## 1    NA     259 10.9   93     6  11
## 2    NA     250  9.2   92     6  12
air <- filter(airquality, Month == 6 & Temp > 90)
air
##   Ozone Solar.R Wind Temp Month Day
## 1    NA     259 10.9   93     6  11
## 2    NA     250  9.2   92     6  12
air <- filter(airquality, Ozone > 80 | Temp > 90)
air
##    Ozone Solar.R Wind Temp Month Day
## 1    115     223  5.7   79     5  30
## 2     NA     259 10.9   93     6  11
## 3     NA     250  9.2   92     6  12
## 4    135     269  4.1   84     7   1
## 5     97     267  6.3   92     7   8
## 6     97     272  5.7   92     7   9
## 7     85     175  7.4   89     7  10
## 8     NA     291 14.9   91     7  14
## 9    108     223  8.0   85     7  25
## 10    82     213  7.4   88     7  28
## 11   122     255  4.0   89     8   7
## 12    89     229 10.3   90     8   8
## 13   110     207  8.0   90     8   9
## 14    NA     222  8.6   92     8  10
## 15   168     238  3.4   81     8  25
## 16    76     203  9.7   97     8  28
## 17   118     225  2.3   94     8  29
## 18    84     237  6.3   96     8  30
## 19    85     188  6.3   94     8  31
## 20    96     167  6.9   91     9   1
## 21    78     197  5.1   92     9   2
## 22    73     183  2.8   93     9   3
## 23    91     189  4.6   93     9   4

slice()

slice(airquality, 6:10)
##   Ozone Solar.R Wind Temp Month Day
## 1    28      NA 14.9   66     5   6
## 2    23     299  8.6   65     5   7
## 3    19      99 13.8   59     5   8
## 4     8      19 20.1   61     5   9
## 5    NA     194  8.6   69     5  10
slice(airquality, n())
##   Ozone Solar.R Wind Temp Month Day
## 1    20     223 11.5   68     9  30
slice(airquality, (n()-4):n())
##   Ozone Solar.R Wind Temp Month Day
## 1    30     193  6.9   70     9  26
## 2    NA     145 13.2   77     9  27
## 3    14     191 14.3   75     9  28
## 4    18     131  8.0   76     9  29
## 5    20     223 11.5   68     9  30

arrange()

arrange(airquality, Temp, Month, Day) %>% 
  head(5)
##   Ozone Solar.R Wind Temp Month Day
## 1    NA      NA 14.3   56     5   5
## 2     6      78 18.4   57     5  18
## 3    NA      66 16.6   57     5  25
## 4    NA      NA  8.0   57     5  27
## 5    18      65 13.2   58     5  15
arrange(airquality, desc(Temp), Month, Day) %>% 
  head(5)
##   Ozone Solar.R Wind Temp Month Day
## 1    76     203  9.7   97     8  28
## 2    84     237  6.3   96     8  30
## 3   118     225  2.3   94     8  29
## 4    85     188  6.3   94     8  31
## 5    NA     259 10.9   93     6  11

select(), rename(), distinct()

select(airquality, Month, Day, Temp) %>% 
  head(5)
##   Month Day Temp
## 1     5   1   67
## 2     5   2   72
## 3     5   3   74
## 4     5   4   62
## 5     5   5   56
select(airquality, Temp:Day) %>% 
  head(5)
##   Temp Month Day
## 1   67     5   1
## 2   72     5   2
## 3   74     5   3
## 4   62     5   4
## 5   56     5   5
select(airquality, -(Temp:Day)) %>% 
  head(5)
##   Ozone Solar.R Wind
## 1    41     190  7.4
## 2    36     118  8.0
## 3    12     149 12.6
## 4    18     313 11.5
## 5    NA      NA 14.3
select(airquality, Solar = Solar.R) %>% 
  head(5)
##   Solar
## 1   190
## 2   118
## 3   149
## 4   313
## 5    NA
rename(airquality, Solar = Solar.R) %>% 
  head(5)
##   Ozone Solar Wind Temp Month Day
## 1    41   190  7.4   67     5   1
## 2    36   118  8.0   72     5   2
## 3    12   149 12.6   74     5   3
## 4    18   313 11.5   62     5   4
## 5    NA    NA 14.3   56     5   5
distinct(select(airquality, Month))
##   Month
## 1     5
## 2     6
## 3     7
## 4     8
## 5     9

mutate()

air <- mutate(airquality,
       Temp.C = (Temp - 32) / 1.8,
       Diff = Temp.C - mean(Temp.C))
head(air)
##   Ozone Solar.R Wind Temp Month Day   Temp.C       Diff
## 1    41     190  7.4   67     5   1 19.44444  -6.045752
## 2    36     118  8.0   72     5   2 22.22222  -3.267974
## 3    12     149 12.6   74     5   3 23.33333  -2.156863
## 4    18     313 11.5   62     5   4 16.66667  -8.823529
## 5    NA      NA 14.3   56     5   5 13.33333 -12.156863
## 6    28      NA 14.9   66     5   6 18.88889  -6.601307
transform(airquality,
          Temp.C = (Temp - 32) / 1.8,
          Diff = Temp.C - mean(Temp.C))
air <- transform(airquality,
                 Temp.C = (Temp - 32) / 1.8)
head(air)
##   Ozone Solar.R Wind Temp Month Day   Temp.C
## 1    41     190  7.4   67     5   1 19.44444
## 2    36     118  8.0   72     5   2 22.22222
## 3    12     149 12.6   74     5   3 23.33333
## 4    18     313 11.5   62     5   4 16.66667
## 5    NA      NA 14.3   56     5   5 13.33333
## 6    28      NA 14.9   66     5   6 18.88889

summarise()

summarise(airquality,
          mean(Temp),
          median(Temp, na.rm = TRUE),
          sd(Temp, na.rm = TRUE),
          max(Temp, na.rm = TRUE),
          min(Temp, na.rm = TRUE))
##   mean(Temp) median(Temp, na.rm = TRUE) sd(Temp, na.rm = TRUE)
## 1   77.88235                         79                9.46527
##   max(Temp, na.rm = TRUE) min(Temp, na.rm = TRUE)
## 1                      97                      56
summarise(airquality,
          Mean = mean(Temp),
          Median = median(Temp, na.rm = TRUE),
          SD = sd(Temp, na.rm = TRUE),
          Max = max(Temp, na.rm = TRUE),
          Min = min(Temp, na.rm = TRUE),
          N = n(),
          Distinct.Month = n_distinct(Month),
          Distinct.First = first(Month),
          Distinct.Last = last(Month))
##       Mean Median      SD Max Min   N Distinct.Month Distinct.First
## 1 77.88235     79 9.46527  97  56 153              5              5
##   Distinct.Last
## 1             9
sample_n(airquality, 5)
##   Ozone Solar.R Wind Temp Month Day
## 1    14     274 10.9   68     5  14
## 2    13     137 10.3   76     6  20
## 3    80     294  8.6   86     7  24
## 4     1       8  9.7   59     5  21
## 5    65     157  9.7   80     8  14
sample_frac(airquality, 0.05, replace = TRUE)
##   Ozone Solar.R Wind Temp Month Day
## 1    27     175 14.9   81     7  13
## 2    23     299  8.6   65     5   7
## 3    10     264 14.3   73     7  12
## 4    61     285  6.3   84     7  18
## 5    NA     264 14.3   79     6   6
## 6    28     273 11.5   82     8  13
## 7    23     115  7.4   76     8  18
## 8    NA     242 16.1   67     6   3

group_by()

air.group <- group_by(airquality, Month)
class(air.group)
## [1] "grouped_df" "tbl_df"     "tbl"        "data.frame"
air.group
## # A tibble: 153 x 6
## # Groups:   Month [5]
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    41     190   7.4    67     5     1
##  2    36     118   8      72     5     2
##  3    12     149  12.6    74     5     3
##  4    18     313  11.5    62     5     4
##  5    NA      NA  14.3    56     5     5
##  6    28      NA  14.9    66     5     6
##  7    23     299   8.6    65     5     7
##  8    19      99  13.8    59     5     8
##  9     8      19  20.1    61     5     9
## 10    NA     194   8.6    69     5    10
## # ... with 143 more rows
summarise(air.group,
          Mean.Temp = mean(Temp, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
##   Month Mean.Temp
##   <int>     <dbl>
## 1     5      65.5
## 2     6      79.1
## 3     7      83.9
## 4     8      84.0
## 5     9      76.9
summarise(air.group,
          Mean.Temp = mean(Temp, na.rm = TRUE),
          SD.Temp = sd(Temp, na.rm = TRUE),
          Days = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 4
##   Month Mean.Temp SD.Temp  Days
##   <int>     <dbl>   <dbl> <int>
## 1     5      65.5    6.85    31
## 2     6      79.1    6.60    30
## 3     7      83.9    4.32    31
## 4     8      84.0    6.59    31
## 5     9      76.9    8.36    30

%>%

iris %>% head
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

group_by(), summarise()

a1 <- select(airquality, Ozone, Temp, Month)
a2 <- group_by(a1, Month)
a3 <- summarise(a2,
                Mean.Ozone = mean(Ozone, na.rm = TRUE),
                Mean.Temp = mean(Temp, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
a4 <- filter(a3, Mean.Ozone > 40 | Mean.Temp > 80)
a4
## # A tibble: 2 x 3
##   Month Mean.Ozone Mean.Temp
##   <int>      <dbl>     <dbl>
## 1     7       59.1      83.9
## 2     8       60.0      84.0
air <- airquality %>% 
  select(Ozone, Temp, Month) %>% 
  group_by(Month) %>% 
  summarise(Mean.Ozone = mean(Ozone, na.rm = TRUE),
            Mean.Temp = mean(Temp, na.rm = TRUE)) %>% 
  filter(Mean.Ozone > 40 | Mean.Temp > 80)
## `summarise()` ungrouping output (override with `.groups` argument)
air
## # A tibble: 2 x 3
##   Month Mean.Ozone Mean.Temp
##   <int>      <dbl>     <dbl>
## 1     7       59.1      83.9
## 2     8       60.0      84.0

형태 변환

library(reshape2)

library(reshape2)
smiths
##      subject time age weight height
## 1 John Smith    1  33     90   1.87
## 2 Mary Smith    1  NA     NA   1.54

melt()

melt(data = smiths)
## Using subject as id variables
##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54
melt(data = smiths,
     id.vars = "subject")
##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54
melt(data = smiths,
     measure.vars = c(2:5))
##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54
melt(data = smiths,
     measure.vars = c("time", "age", "weight", "height"))
##      subject variable value
## 1 John Smith     time  1.00
## 2 Mary Smith     time  1.00
## 3 John Smith      age 33.00
## 4 Mary Smith      age    NA
## 5 John Smith   weight 90.00
## 6 Mary Smith   weight    NA
## 7 John Smith   height  1.87
## 8 Mary Smith   height  1.54

dcast()

smiths.long <- melt(data = smiths,
     id.vars = "subject",
     measure.vars = c("time", "age", "weight", "height"),
     variable.name = "var",
     value.name = "val")
dcast(data = smiths.long, formula = subject ~ var,
      value.var = "val")
##      subject time age weight height
## 1 John Smith    1  33     90   1.87
## 2 Mary Smith    1  NA     NA   1.54
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
aq.long <- melt(airquality,
                id.vars = c("Month", "Day"))
head(aq.long)
##   Month Day variable value
## 1     5   1    Ozone    41
## 2     5   2    Ozone    36
## 3     5   3    Ozone    12
## 4     5   4    Ozone    18
## 5     5   5    Ozone    NA
## 6     5   6    Ozone    28
aq.wide <- dcast(aq.long,
                 Month + Day ~ variable,
                 valaue.var = "value")
head(aq.wide)
##   Month Day Ozone Solar.R Wind Temp
## 1     5   1    41     190  7.4   67
## 2     5   2    36     118  8.0   72
## 3     5   3    12     149 12.6   74
## 4     5   4    18     313 11.5   62
## 5     5   5    NA      NA 14.3   56
## 6     5   6    28      NA 14.9   66
dcast(aq.long, Month ~ variable)
## Aggregation function missing: defaulting to length
##   Month Ozone Solar.R Wind Temp
## 1     5    31      31   31   31
## 2     6    30      30   30   30
## 3     7    31      31   31   31
## 4     8    31      31   31   31
## 5     9    30      30   30   30
dcast(aq.long, Month ~ variable,
      fun.aggregate = mean, na.rm = TRUE)
##   Month    Ozone  Solar.R      Wind     Temp
## 1     5 23.61538 181.2963 11.622581 65.54839
## 2     6 29.44444 190.1667 10.266667 79.10000
## 3     7 59.11538 216.4839  8.941935 83.90323
## 4     8 59.96154 171.8571  8.793548 83.96774
## 5     9 31.44828 167.4333 10.180000 76.90000

library(tidyr)

gather()

library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  Ozone:Temp)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  -Month, -Day)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  1:4)
aq.long <- gather(airquality,
                  key = Factor,
                  value = Measurement,
                  Ozone, Solar.R, Wind, Temp)

spread()

spread(data = aq.long,
       key = Factor,
       value = Measurement)
##     Month Day Ozone Solar.R Temp Wind
## 1       5   1    41     190   67  7.4
## 2       5   2    36     118   72  8.0
## 3       5   3    12     149   74 12.6
## 4       5   4    18     313   62 11.5
## 5       5   5    NA      NA   56 14.3
## 6       5   6    28      NA   66 14.9
## 7       5   7    23     299   65  8.6
## 8       5   8    19      99   59 13.8
## 9       5   9     8      19   61 20.1
## 10      5  10    NA     194   69  8.6
## 11      5  11     7      NA   74  6.9
## 12      5  12    16     256   69  9.7
## 13      5  13    11     290   66  9.2
## 14      5  14    14     274   68 10.9
## 15      5  15    18      65   58 13.2
## 16      5  16    14     334   64 11.5
## 17      5  17    34     307   66 12.0
## 18      5  18     6      78   57 18.4
## 19      5  19    30     322   68 11.5
## 20      5  20    11      44   62  9.7
## 21      5  21     1       8   59  9.7
## 22      5  22    11     320   73 16.6
## 23      5  23     4      25   61  9.7
## 24      5  24    32      92   61 12.0
## 25      5  25    NA      66   57 16.6
## 26      5  26    NA     266   58 14.9
## 27      5  27    NA      NA   57  8.0
## 28      5  28    23      13   67 12.0
## 29      5  29    45     252   81 14.9
## 30      5  30   115     223   79  5.7
## 31      5  31    37     279   76  7.4
## 32      6   1    NA     286   78  8.6
## 33      6   2    NA     287   74  9.7
## 34      6   3    NA     242   67 16.1
## 35      6   4    NA     186   84  9.2
## 36      6   5    NA     220   85  8.6
## 37      6   6    NA     264   79 14.3
## 38      6   7    29     127   82  9.7
## 39      6   8    NA     273   87  6.9
## 40      6   9    71     291   90 13.8
## 41      6  10    39     323   87 11.5
## 42      6  11    NA     259   93 10.9
## 43      6  12    NA     250   92  9.2
## 44      6  13    23     148   82  8.0
## 45      6  14    NA     332   80 13.8
## 46      6  15    NA     322   79 11.5
## 47      6  16    21     191   77 14.9
## 48      6  17    37     284   72 20.7
## 49      6  18    20      37   65  9.2
## 50      6  19    12     120   73 11.5
## 51      6  20    13     137   76 10.3
## 52      6  21    NA     150   77  6.3
## 53      6  22    NA      59   76  1.7
## 54      6  23    NA      91   76  4.6
## 55      6  24    NA     250   76  6.3
## 56      6  25    NA     135   75  8.0
## 57      6  26    NA     127   78  8.0
## 58      6  27    NA      47   73 10.3
## 59      6  28    NA      98   80 11.5
## 60      6  29    NA      31   77 14.9
## 61      6  30    NA     138   83  8.0
## 62      7   1   135     269   84  4.1
## 63      7   2    49     248   85  9.2
## 64      7   3    32     236   81  9.2
## 65      7   4    NA     101   84 10.9
## 66      7   5    64     175   83  4.6
## 67      7   6    40     314   83 10.9
## 68      7   7    77     276   88  5.1
## 69      7   8    97     267   92  6.3
## 70      7   9    97     272   92  5.7
## 71      7  10    85     175   89  7.4
## 72      7  11    NA     139   82  8.6
## 73      7  12    10     264   73 14.3
## 74      7  13    27     175   81 14.9
## 75      7  14    NA     291   91 14.9
## 76      7  15     7      48   80 14.3
## 77      7  16    48     260   81  6.9
## 78      7  17    35     274   82 10.3
## 79      7  18    61     285   84  6.3
## 80      7  19    79     187   87  5.1
## 81      7  20    63     220   85 11.5
## 82      7  21    16       7   74  6.9
## 83      7  22    NA     258   81  9.7
## 84      7  23    NA     295   82 11.5
## 85      7  24    80     294   86  8.6
## 86      7  25   108     223   85  8.0
## 87      7  26    20      81   82  8.6
## 88      7  27    52      82   86 12.0
## 89      7  28    82     213   88  7.4
## 90      7  29    50     275   86  7.4
## 91      7  30    64     253   83  7.4
## 92      7  31    59     254   81  9.2
## 93      8   1    39      83   81  6.9
## 94      8   2     9      24   81 13.8
## 95      8   3    16      77   82  7.4
## 96      8   4    78      NA   86  6.9
## 97      8   5    35      NA   85  7.4
## 98      8   6    66      NA   87  4.6
## 99      8   7   122     255   89  4.0
## 100     8   8    89     229   90 10.3
## 101     8   9   110     207   90  8.0
## 102     8  10    NA     222   92  8.6
## 103     8  11    NA     137   86 11.5
## 104     8  12    44     192   86 11.5
## 105     8  13    28     273   82 11.5
## 106     8  14    65     157   80  9.7
## 107     8  15    NA      64   79 11.5
## 108     8  16    22      71   77 10.3
## 109     8  17    59      51   79  6.3
## 110     8  18    23     115   76  7.4
## 111     8  19    31     244   78 10.9
## 112     8  20    44     190   78 10.3
## 113     8  21    21     259   77 15.5
## 114     8  22     9      36   72 14.3
## 115     8  23    NA     255   75 12.6
## 116     8  24    45     212   79  9.7
## 117     8  25   168     238   81  3.4
## 118     8  26    73     215   86  8.0
## 119     8  27    NA     153   88  5.7
## 120     8  28    76     203   97  9.7
## 121     8  29   118     225   94  2.3
## 122     8  30    84     237   96  6.3
## 123     8  31    85     188   94  6.3
## 124     9   1    96     167   91  6.9
## 125     9   2    78     197   92  5.1
## 126     9   3    73     183   93  2.8
## 127     9   4    91     189   93  4.6
## 128     9   5    47      95   87  7.4
## 129     9   6    32      92   84 15.5
## 130     9   7    20     252   80 10.9
## 131     9   8    23     220   78 10.3
## 132     9   9    21     230   75 10.9
## 133     9  10    24     259   73  9.7
## 134     9  11    44     236   81 14.9
## 135     9  12    21     259   76 15.5
## 136     9  13    28     238   77  6.3
## 137     9  14     9      24   71 10.9
## 138     9  15    13     112   71 11.5
## 139     9  16    46     237   78  6.9
## 140     9  17    18     224   67 13.8
## 141     9  18    13      27   76 10.3
## 142     9  19    24     238   68 10.3
## 143     9  20    16     201   82  8.0
## 144     9  21    13     238   64 12.6
## 145     9  22    23      14   71  9.2
## 146     9  23    36     139   81 10.3
## 147     9  24     7      49   69 10.3
## 148     9  25    14      20   63 16.6
## 149     9  26    30     193   70  6.9
## 150     9  27    NA     145   77 13.2
## 151     9  28    14     191   75 14.3
## 152     9  29    18     131   76  8.0
## 153     9  30    20     223   68 11.5
aq.long %>% 
  spread(key = Factor, value = Measurement)
##     Month Day Ozone Solar.R Temp Wind
## 1       5   1    41     190   67  7.4
## 2       5   2    36     118   72  8.0
## 3       5   3    12     149   74 12.6
## 4       5   4    18     313   62 11.5
## 5       5   5    NA      NA   56 14.3
## 6       5   6    28      NA   66 14.9
## 7       5   7    23     299   65  8.6
## 8       5   8    19      99   59 13.8
## 9       5   9     8      19   61 20.1
## 10      5  10    NA     194   69  8.6
## 11      5  11     7      NA   74  6.9
## 12      5  12    16     256   69  9.7
## 13      5  13    11     290   66  9.2
## 14      5  14    14     274   68 10.9
## 15      5  15    18      65   58 13.2
## 16      5  16    14     334   64 11.5
## 17      5  17    34     307   66 12.0
## 18      5  18     6      78   57 18.4
## 19      5  19    30     322   68 11.5
## 20      5  20    11      44   62  9.7
## 21      5  21     1       8   59  9.7
## 22      5  22    11     320   73 16.6
## 23      5  23     4      25   61  9.7
## 24      5  24    32      92   61 12.0
## 25      5  25    NA      66   57 16.6
## 26      5  26    NA     266   58 14.9
## 27      5  27    NA      NA   57  8.0
## 28      5  28    23      13   67 12.0
## 29      5  29    45     252   81 14.9
## 30      5  30   115     223   79  5.7
## 31      5  31    37     279   76  7.4
## 32      6   1    NA     286   78  8.6
## 33      6   2    NA     287   74  9.7
## 34      6   3    NA     242   67 16.1
## 35      6   4    NA     186   84  9.2
## 36      6   5    NA     220   85  8.6
## 37      6   6    NA     264   79 14.3
## 38      6   7    29     127   82  9.7
## 39      6   8    NA     273   87  6.9
## 40      6   9    71     291   90 13.8
## 41      6  10    39     323   87 11.5
## 42      6  11    NA     259   93 10.9
## 43      6  12    NA     250   92  9.2
## 44      6  13    23     148   82  8.0
## 45      6  14    NA     332   80 13.8
## 46      6  15    NA     322   79 11.5
## 47      6  16    21     191   77 14.9
## 48      6  17    37     284   72 20.7
## 49      6  18    20      37   65  9.2
## 50      6  19    12     120   73 11.5
## 51      6  20    13     137   76 10.3
## 52      6  21    NA     150   77  6.3
## 53      6  22    NA      59   76  1.7
## 54      6  23    NA      91   76  4.6
## 55      6  24    NA     250   76  6.3
## 56      6  25    NA     135   75  8.0
## 57      6  26    NA     127   78  8.0
## 58      6  27    NA      47   73 10.3
## 59      6  28    NA      98   80 11.5
## 60      6  29    NA      31   77 14.9
## 61      6  30    NA     138   83  8.0
## 62      7   1   135     269   84  4.1
## 63      7   2    49     248   85  9.2
## 64      7   3    32     236   81  9.2
## 65      7   4    NA     101   84 10.9
## 66      7   5    64     175   83  4.6
## 67      7   6    40     314   83 10.9
## 68      7   7    77     276   88  5.1
## 69      7   8    97     267   92  6.3
## 70      7   9    97     272   92  5.7
## 71      7  10    85     175   89  7.4
## 72      7  11    NA     139   82  8.6
## 73      7  12    10     264   73 14.3
## 74      7  13    27     175   81 14.9
## 75      7  14    NA     291   91 14.9
## 76      7  15     7      48   80 14.3
## 77      7  16    48     260   81  6.9
## 78      7  17    35     274   82 10.3
## 79      7  18    61     285   84  6.3
## 80      7  19    79     187   87  5.1
## 81      7  20    63     220   85 11.5
## 82      7  21    16       7   74  6.9
## 83      7  22    NA     258   81  9.7
## 84      7  23    NA     295   82 11.5
## 85      7  24    80     294   86  8.6
## 86      7  25   108     223   85  8.0
## 87      7  26    20      81   82  8.6
## 88      7  27    52      82   86 12.0
## 89      7  28    82     213   88  7.4
## 90      7  29    50     275   86  7.4
## 91      7  30    64     253   83  7.4
## 92      7  31    59     254   81  9.2
## 93      8   1    39      83   81  6.9
## 94      8   2     9      24   81 13.8
## 95      8   3    16      77   82  7.4
## 96      8   4    78      NA   86  6.9
## 97      8   5    35      NA   85  7.4
## 98      8   6    66      NA   87  4.6
## 99      8   7   122     255   89  4.0
## 100     8   8    89     229   90 10.3
## 101     8   9   110     207   90  8.0
## 102     8  10    NA     222   92  8.6
## 103     8  11    NA     137   86 11.5
## 104     8  12    44     192   86 11.5
## 105     8  13    28     273   82 11.5
## 106     8  14    65     157   80  9.7
## 107     8  15    NA      64   79 11.5
## 108     8  16    22      71   77 10.3
## 109     8  17    59      51   79  6.3
## 110     8  18    23     115   76  7.4
## 111     8  19    31     244   78 10.9
## 112     8  20    44     190   78 10.3
## 113     8  21    21     259   77 15.5
## 114     8  22     9      36   72 14.3
## 115     8  23    NA     255   75 12.6
## 116     8  24    45     212   79  9.7
## 117     8  25   168     238   81  3.4
## 118     8  26    73     215   86  8.0
## 119     8  27    NA     153   88  5.7
## 120     8  28    76     203   97  9.7
## 121     8  29   118     225   94  2.3
## 122     8  30    84     237   96  6.3
## 123     8  31    85     188   94  6.3
## 124     9   1    96     167   91  6.9
## 125     9   2    78     197   92  5.1
## 126     9   3    73     183   93  2.8
## 127     9   4    91     189   93  4.6
## 128     9   5    47      95   87  7.4
## 129     9   6    32      92   84 15.5
## 130     9   7    20     252   80 10.9
## 131     9   8    23     220   78 10.3
## 132     9   9    21     230   75 10.9
## 133     9  10    24     259   73  9.7
## 134     9  11    44     236   81 14.9
## 135     9  12    21     259   76 15.5
## 136     9  13    28     238   77  6.3
## 137     9  14     9      24   71 10.9
## 138     9  15    13     112   71 11.5
## 139     9  16    46     237   78  6.9
## 140     9  17    18     224   67 13.8
## 141     9  18    13      27   76 10.3
## 142     9  19    24     238   68 10.3
## 143     9  20    16     201   82  8.0
## 144     9  21    13     238   64 12.6
## 145     9  22    23      14   71  9.2
## 146     9  23    36     139   81 10.3
## 147     9  24     7      49   69 10.3
## 148     9  25    14      20   63 16.6
## 149     9  26    30     193   70  6.9
## 150     9  27    NA     145   77 13.2
## 151     9  28    14     191   75 14.3
## 152     9  29    18     131   76  8.0
## 153     9  30    20     223   68 11.5

separate()

iris.long <- gather(iris, Element, Measurement, -Species)
tail(iris.long)
##       Species     Element Measurement
## 595 virginica Petal.Width         2.5
## 596 virginica Petal.Width         2.3
## 597 virginica Petal.Width         1.9
## 598 virginica Petal.Width         2.0
## 599 virginica Petal.Width         2.3
## 600 virginica Petal.Width         1.8
iris.sep <- separate(data = iris.long,
                     col = Element,
                     into = c("Part", "Measures"))
tail(iris.sep)
##       Species  Part Measures Measurement
## 595 virginica Petal    Width         2.5
## 596 virginica Petal    Width         2.3
## 597 virginica Petal    Width         1.9
## 598 virginica Petal    Width         2.0
## 599 virginica Petal    Width         2.3
## 600 virginica Petal    Width         1.8

unite()

iris.unite <- unite(data = iris.sep,
                    col = Factor,
                    Part, Measures,
                    sep = "_")
tail(iris.unite)
##       Species      Factor Measurement
## 595 virginica Petal_Width         2.5
## 596 virginica Petal_Width         2.3
## 597 virginica Petal_Width         1.9
## 598 virginica Petal_Width         2.0
## 599 virginica Petal_Width         2.3
## 600 virginica Petal_Width         1.8

ggplot2

Grammar of Graphics

library(ggplot2)

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_point() +
  labs(x = "weight (1,000 lbs)",
       y = "Fuel Consumption (miles per gallon)",
       title = "Fuel Consumption vs. Weight",
       subtitle = "Negative relationship betweeen fuel efficiency and car weight",
       caption = "Source: mpg dataset")

ggplot(data = mtcars, aes(x = mpg)) +
  geom_histogram() +
  facet_grid(cyl ~ .) +
  labs(title = "geom_histogram()",
       x = "Miles per Gallon")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mtcars$cyl <- factor(mtcars$cyl,
                     levels = c(4, 6, 8),
                     labels = c("4 cylinders", "6 cylinders", "8 cylinders"))
ggplot(data = mtcars, aes(x = cyl, y = mpg)) +
  geom_boxplot() +
  labs(title = "geom_boxplot()",
       x = "Number of Cylinders",
       y = "Miles per Gallon")

ggplot(data = mtcars, aes(x = mpg, fill = cyl)) +
  geom_density() +
  labs(title = "geom_density()",
       x = "Miles per Gallon")

ggplot(data = mtcars, aes(x = wt, y = mpg, col = cyl)) +
  geom_point() +
  labs(title = "geom_point()",
       x = "Weight (1,000 lbs)",
       y = "Miles per Gallon")

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_smooth() +
  labs(title = "geom_smooth()",
       x = "Weight (1,000lbs)",
       y = "Miles per Gallon")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = economics, aes(x = date, y = unemploy)) +
  geom_line() +
  labs(title = "geom_line()",
       x = "Year", y = "Number of Unemployed (thousands)")

geom()

ggplot(data = mtcars, aes(x = wt, y = mpg)) +
  geom_point(shape = 21,
             color = "blue",
             bg = "skyblue", # 내부 색
             size = 2,
             stroke = 1) + # 외부 라인 두께
  geom_smooth(method = "lm", # 회귀 방법
              color = "red",
              linetype = 2,
              size = 1) +
  geom_text(label = rownames(mtcars),
            hjust = 0,
            vjust = 0,
            nudge_y = 0.7,
            size =2) +
  labs(x = "weight (1,000 lbs)",
       y = "Fuel Consumption (miles per gallon)",
       title = "Fuel Consumption vs. Weight",
       subtitle = "Negative relationship betweeen fuel efficiency and car weight",
       caption = "Source: mpg dataset")
## `geom_smooth()` using formula 'y ~ x'

geom_boxplot()

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
str(Salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot(fill = "salmon",
               color = "dimgray",
               notch = TRUE) +
  geom_point(position = "jitter", # 퍼트리기
             color = "royalblue",
             alpha = 0.5) + # 투명도
  geom_rug(sides = "l",
           color = "dimgray")

geom_violin()

library(lattice)
head(singer)
##   height voice.part
## 1     64  Soprano 1
## 2     62  Soprano 1
## 3     66  Soprano 1
## 4     65  Soprano 1
## 5     60  Soprano 1
## 6     61  Soprano 1
ggplot(singer, aes(x = voice.part, y = height)) +
  geom_violin(fill = "honeydew2") +
  geom_boxplot(fill = "lightgreen", width = 0.2)

geom_density()

library(car)
str(Salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
ggplot(Salaries, aes(x = salary, fill = rank)) +
  geom_density(alpha = 0.5)

ggplot(Salaries, aes(x = yrs.since.phd,
                     y = salary,
                     color = rank,
                     shape = sex)) +
  geom_point() 

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "stack")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "dodge")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar(position = "fill") + # stack, dodge, fill
  labs(y = "Proportion")

geom_bar()

presummed <- data.frame(Grade = c("A", "B", "C", "D", "F"),
                        Frequency = c(20, 40, 20, 10, 5))
presummed
##   Grade Frequency
## 1     A        20
## 2     B        40
## 3     C        20
## 4     D        10
## 5     F         5
ggplot(presummed, aes(x = Grade, y = Frequency)) +
  geom_bar(stat = "identity")

ggplot(presummed, aes(x = Grade, y = Frequency)) +
  geom_col()

facet_wrap(), facet_grid()

library(lattice)
head(singer)
##   height voice.part
## 1     64  Soprano 1
## 2     62  Soprano 1
## 3     66  Soprano 1
## 4     65  Soprano 1
## 5     60  Soprano 1
## 6     61  Soprano 1
ggplot(singer, aes(x = height)) +
  geom_histogram() +
  facet_wrap(~ voice.part, nrow = 4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(singer, aes(x = height, fill = voice.part)) +
  geom_density() +
  facet_grid(voice.part ~ .)

ggplot(Salaries, aes(x = yrs.since.phd, y = salary)) +
  geom_point() +
  facet_grid(sex ~ rank)

ggplot(Salaries, aes(x = yrs.since.phd,
                     y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point() +
  facet_grid(. ~ sex)

scale_<aesthetic>_<name/data type>

library(car)
str(Salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  scale_x_discrete(breaks = c("AsstProf", "AssocProf", "Prof"),
                   labels = c("Assistant\nProfessor",
                              "Associate\nProfessor",
                              "Professor")) +
  scale_y_continuous(breaks = c(50000, 100000, 150000, 200000),
                     labels = c("$50k", "$100k", "$150k", "$200k")) +
  labs(fill = "Gender")

ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  scale_x_discrete(breaks = c("AsstProf", "AssocProf", "Prof"),
                   labels = c("Assistant\nProfessor",
                              "Associate\nProfessor",
                              "Professor")) +
  scale_y_continuous(breaks = c(50000, 100000, 150000, 200000),
                     labels = c("$50k", "$100k", "$150k", "$200k")) +
  scale_fill_discrete(name = "Gender") + # 범례명
  theme(legend.position = c(0.15, 0.75)) # 범례위치

data(mtcars)

ggplot(mtcars, aes(x = wt, y = mpg,
                   shape = factor(cyl),
                   color = factor(cyl)))  +
  geom_point() +
  labs(shape = "Cylinder",
       color = "Cylinder")

ggplot(mtcars, aes(x = wt, y = mpg,
                   shape = factor(cyl),
                   color = factor(cyl)))  +
  geom_point() +
  scale_shape_discrete(name = "Cylinder") +
  scale_color_discrete(name = "Cylinder")

ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
  geom_point(shape = 21,
             color = "black",
             fill = "wheat") +
  labs(size = "Engine\nDisplacement")

ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
  geom_point(shape = 21,
             color = "black",
             fill = "wheat") +
  scale_size_continuous(name = "Engine\nDisplacement")

ggplot(Salaries, aes(x = rank, fill = sex)) +
  geom_bar() +
  scale_fill_manual(values = c("tomato", "cornflowerblue"))

ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank)) +
  geom_point(size = 2) +
  scale_color_manual(values = c("orange", "violetred", "steelblue"))

ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank)) +
  geom_point(size = 2) +
  scale_color_brewer(palette = "Accent")

ggplot(mtcars, aes(x = wt, y = mpg, color = disp)) +
  geom_point() +
  scale_color_gradient2()

ggplot(Salaries, aes(x = yrs.since.phd, y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point(size = 2) +
  scale_shape_manual(values = c(15, 17, 19))

theme_<theme>

library(car)
str(Salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
ggplot(Salaries, aes(x = yrs.since.phd, y = salary,
                     color = rank,
                     shape = rank)) +
  geom_point() +
  facet_grid(. ~ sex) +
  theme_light() # default: theme_gray()

ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
  geom_boxplot() +
  labs(title = "Salary by Rank and Sex",
       x = "Rank",
       y = "Salary") +
  theme(plot.title = element_text(face = "bold.italic",
                                  size = 14,
                                  color = "brown"),
        axis.title = element_text(face = "bold.italic",
                                  size = 10,
                                  color = "tomato"),
        axis.text = element_text(face = "bold",
                                  size = 9,
                                  color = "royalblue"),
        panel.background = element_rect(fill = "snow",
                                        color = "darkblue"),
        panel.grid.major.y = element_line(color = "gray",
                                          linetype = "solid"),
        panel.grid.minor.y = element_line(color = "gray",
                                          linetype = "dashed"),
        legend.position = "top")

mytheme <- theme(plot.title = element_text(face = "bold.italic",
                                  size = 14,
                                  color = "brown"),
        axis.title = element_text(face = "bold.italic",
                                  size = 10,
                                  color = "tomato"),
        axis.text = element_text(face = "bold",
                                  size = 9,
                                  color = "royalblue"),
        panel.background = element_rect(fill = "snow",
                                        color = "darkblue"),
        panel.grid.major.y = element_line(color = "gray",
                                          linetype = "solid"),
        panel.grid.minor.y = element_line(color = "gray",
                                          linetype = "dashed"),
        legend.position = "top")

library(lattice)
ggplot(singer, aes(x = voice.part, y = height)) +
  geom_boxplot() +
  labs(title = "Height by voice part",
       x = "Voice Part",
       y = "Height") +
  mytheme

library(gridExtra)

library(ggplot2)
library(car)

p1 <- ggplot(Salaries, aes(x = rank)) +
  geom_bar(fill = "steelblue")

p2 <- ggplot(Salaries, aes(x = salary)) +
  geom_histogram(fill = "maroon")

p3 <- ggplot(Salaries, aes(x = yrs.since.phd, y = salary)) +
  geom_point(color = "orange")

p4 <- ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot(fill = "mistyrose")

# install.packages("gridExtra")
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

myggplot <- grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file = "myplot.png",
       plot = myggplot,
       width = 7.0, # inch
       height = 5.5)

ggplot(Salaries, aes(x = rank, y = salary)) +
  geom_boxplot()

ggsave(file = "myplot2.png")
## Saving 7 x 5 in image
?par
colors()
?RColorBrewer
# http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf

질문은 여기

이 노트 관련 문의는 제이스’s 블로그 에 댓글로 부탁드립니다.