(1) 연산자의 활용
x = 3
y = 5
z = 10
# 덧셈
x + y
## [1] 8
# 뺄셈
x - y
## [1] -2
# 곱셈
x * y
## [1] 15
# 나눗셈
x / y
## [1] 0.6
# 나눗셈의 몫
z %/% 3
## [1] 3
# 나눗셈의 나머지
z %% 3
## [1] 1
# 제곱과 제곱근
z**2
## [1] 100
z**0.5
## [1] 3.162278
sqrt(z)
## [1] 3.162278
# 논리연산자 (조건)
x < y
## [1] TRUE
x > y
## [1] FALSE
x >= 2
## [1] TRUE
x <= 2
## [1] FALSE
x == y
## [1] FALSE
x != y
## [1] TRUE
x > 3
## [1] FALSE
y > 3
## [1] TRUE
# 교집합 (&)
x > 3 & y > 3
## [1] FALSE
# 합집합 (|)
x > 3 | y > 3
## [1] TRUE
(2) 자료의 생성 및 입력
# 벡터의 생성 및 선택
1:6
## [1] 1 2 3 4 5 6
c(1,2,3,4,5,6)
## [1] 1 2 3 4 5 6
c(1,3,5,7,9)
## [1] 1 3 5 7 9
a = c(1,3,5,7,9)
a
## [1] 1 3 5 7 9
b = c(1,3,5,7,9, 1:10)
b
## [1] 1 3 5 7 9 1 2 3 4 5 6 7 8 9 10
c = seq(1,3, by= 0.5)
c
## [1] 1.0 1.5 2.0 2.5 3.0
d = seq(1,3, by = 0.2)
d
## [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0
e = rep(1:3, times=5)
e
## [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
f = rep(1:3, each=5)
f
## [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
a
## [1] 1 3 5 7 9
a[3]
## [1] 5
a[2:4]
## [1] 3 5 7
a[c(1,3)]
## [1] 1 5
a[-3]
## [1] 1 3 7 9
a[a == 5]
## [1] 5
a[a != 5]
## [1] 1 3 7 9
a[c(1,3)] = 0
a
## [1] 0 3 0 7 9
# 행렬의 생성과 선택
x = matrix(1:12, 3, 4)
x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
y = matrix(1:12, 3, 4, byrow=T)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
matrix(1, 3, 4)
## [,1] [,2] [,3] [,4]
## [1,] 1 1 1 1
## [2,] 1 1 1 1
## [3,] 1 1 1 1
matrix(0, 3, 4)
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
matrix(NaN, 3, 4)
## [,1] [,2] [,3] [,4]
## [1,] NaN NaN NaN NaN
## [2,] NaN NaN NaN NaN
## [3,] NaN NaN NaN NaN
z = diag(5)
z
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 0 0
## [2,] 0 1 0 0 0
## [3,] 0 0 1 0 0
## [4,] 0 0 0 1 0
## [5,] 0 0 0 0 1
diag(z)
## [1] 1 1 1 1 1
x[1:2,1:2]
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
x[2,3]
## [1] 8
x[1,]
## [1] 1 4 7 10
x[,1]
## [1] 1 2 3
# 배열의 생성과 선택
X = array(1:24,c(3,4,2))
X
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
X[, , 1]
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
X[, , 2]
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
X[2, 4, 2]
## [1] 23
(3) Dataframe (table)의 생성 및 선택
# 데이터프레임의 생성 및 선택
df = data.frame(x = 1:3,
y = c('kim', 'lee', 'park'),
z = c(TRUE, TRUE, FALSE))
df
## x y z
## 1 1 kim TRUE
## 2 2 lee TRUE
## 3 3 park FALSE
df$x
## [1] 1 2 3
df[,1]
## [1] 1 2 3
df[[1]]
## [1] 1 2 3
df[1,]
## x y z
## 1 1 kim TRUE
df[3,3]
## [1] FALSE
x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
y
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
z
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 0 0
## [2,] 0 1 0 0 0
## [3,] 0 0 1 0 0
## [4,] 0 0 0 1 0
## [5,] 0 0 0 0 1
a
## [1] 0 3 0 7 9
b
## [1] 1 3 5 7 9 1 2 3 4 5 6 7 8 9 10
c
## [1] 1.0 1.5 2.0 2.5 3.0
out = list(a=a,b=b,c=c,x=x,y=y,z=z,df=df)
out
## $a
## [1] 0 3 0 7 9
##
## $b
## [1] 1 3 5 7 9 1 2 3 4 5 6 7 8 9 10
##
## $c
## [1] 1.0 1.5 2.0 2.5 3.0
##
## $x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## $y
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
##
## $z
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 0 0
## [2,] 0 1 0 0 0
## [3,] 0 0 1 0 0
## [4,] 0 0 0 1 0
## [5,] 0 0 0 0 1
##
## $df
## x y z
## 1 1 kim TRUE
## 2 2 lee TRUE
## 3 3 park FALSE
out$df
## x y z
## 1 1 kim TRUE
## 2 2 lee TRUE
## 3 3 park FALSE
out$df[3,3]
## [1] FALSE
# 데이터프레임을 입력하기 위한 packages (openxlsx, readxl, data.table)
# Dataframe의 자료입력
# install.packages("openxlsx") # dataframe reading
# install.packages("readxl") # tibble dataframe
# install.packages("data.table") # data.table dataframe
library(openxlsx)
library(readxl)
library(data.table)
df1 = read.xlsx("regress.xlsx")
df2 = read_excel("regress.xlsx")
df1
## 근무만족도 대인관계 자아개념 근무평정 SES점수
## 1 88 34 78 88 88
## 2 98 23 98 78 56
## 3 88 34 78 98 78
## 4 89 23 88 77 78
## 5 89 34 88 89 67
## 6 78 45 87 89 78
## 7 78 34 89 98 77
## 8 67 34 67 67 56
## 9 78 45 56 78 77
## 10 89 34 78 67 65
## 11 88 78 67 56 64
## 12 67 65 34 67 65
## 13 45 56 45 78 34
## 14 43 67 34 89 45
## 15 45 78 45 78 67
## 16 56 76 56 67 57
## 17 45 78 45 78 45
## 18 67 89 34 67 56
## 19 56 78 45 78 56
## 20 56 67 34 67 45
## 21 45 56 23 98 34
## 22 45 34 34 56 45
## 23 56 23 56 67 56
## 24 45 34 45 78 45
## 25 56 23 34 89 34
## 26 45 34 45 78 45
## 27 34 45 34 67 34
## 28 45 34 23 56 45
## 29 56 45 34 67 56
## 30 65 56 56 78 66
df2
## # A tibble: 30 × 5
## 근무만족도 대인관계 자아개념 근무평정 SES점수
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 88 34 78 88 88
## 2 98 23 98 78 56
## 3 88 34 78 98 78
## 4 89 23 88 77 78
## 5 89 34 88 89 67
## 6 78 45 87 89 78
## 7 78 34 89 98 77
## 8 67 34 67 67 56
## 9 78 45 56 78 77
## 10 89 34 78 67 65
## # ℹ 20 more rows
class(df1)
## [1] "data.frame"
class(df2)
## [1] "tbl_df" "tbl" "data.frame"
df3 = read.csv("tempbig.csv")
df4 = read.csv2("tempbig.csv")
df5 = fread("tempbig.csv")
system.time(read.csv("tempbig.csv"))
## 사용자 시스템 elapsed
## 2.10 0.30 3.78
system.time(fread("tempbig.csv"))
## 사용자 시스템 elapsed
## 0.06 0.00 0.03