(1) 연산자의 활용

x = 3
y = 5
z = 10


# 덧셈
x + y
## [1] 8
# 뺄셈
x - y
## [1] -2
# 곱셈
x * y
## [1] 15
# 나눗셈
x / y
## [1] 0.6
# 나눗셈의 몫
z %/% 3
## [1] 3
# 나눗셈의 나머지
z %% 3
## [1] 1
# 제곱과 제곱근
z**2
## [1] 100
z**0.5
## [1] 3.162278
sqrt(z)
## [1] 3.162278
# 논리연산자 (조건)
x < y
## [1] TRUE
x > y
## [1] FALSE
x >= 2
## [1] TRUE
x <= 2
## [1] FALSE
x == y
## [1] FALSE
x != y
## [1] TRUE
x > 3
## [1] FALSE
y > 3
## [1] TRUE
# 교집합 (&)
x > 3 & y > 3
## [1] FALSE
# 합집합 (|)
x > 3 | y > 3
## [1] TRUE

(2) 자료의 생성 및 입력

# 벡터의 생성 및 선택

1:6
## [1] 1 2 3 4 5 6
c(1,2,3,4,5,6)
## [1] 1 2 3 4 5 6
c(1,3,5,7,9)
## [1] 1 3 5 7 9
a = c(1,3,5,7,9)
a
## [1] 1 3 5 7 9
b = c(1,3,5,7,9, 1:10)
b
##  [1]  1  3  5  7  9  1  2  3  4  5  6  7  8  9 10
c = seq(1,3, by= 0.5)
c
## [1] 1.0 1.5 2.0 2.5 3.0
d = seq(1,3, by = 0.2)
d
##  [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0
e = rep(1:3, times=5)
e
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
f = rep(1:3, each=5)
f
##  [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
a
## [1] 1 3 5 7 9
a[3]
## [1] 5
a[2:4]
## [1] 3 5 7
a[c(1,3)]
## [1] 1 5
a[-3]
## [1] 1 3 7 9
a[a == 5]
## [1] 5
a[a != 5]
## [1] 1 3 7 9
a[c(1,3)] = 0
a
## [1] 0 3 0 7 9
# 행렬의 생성과 선택

x = matrix(1:12, 3, 4)
x
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
y = matrix(1:12, 3, 4, byrow=T)
y
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
matrix(1, 3, 4)
##      [,1] [,2] [,3] [,4]
## [1,]    1    1    1    1
## [2,]    1    1    1    1
## [3,]    1    1    1    1
matrix(0, 3, 4)
##      [,1] [,2] [,3] [,4]
## [1,]    0    0    0    0
## [2,]    0    0    0    0
## [3,]    0    0    0    0
matrix(NaN, 3, 4)
##      [,1] [,2] [,3] [,4]
## [1,]  NaN  NaN  NaN  NaN
## [2,]  NaN  NaN  NaN  NaN
## [3,]  NaN  NaN  NaN  NaN
z = diag(5)
z
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    0    0    0    0
## [2,]    0    1    0    0    0
## [3,]    0    0    1    0    0
## [4,]    0    0    0    1    0
## [5,]    0    0    0    0    1
diag(z)
## [1] 1 1 1 1 1
x[1:2,1:2]
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
x[2,3]
## [1] 8
x[1,]
## [1]  1  4  7 10
x[,1]
## [1] 1 2 3
# 배열의 생성과 선택
X = array(1:24,c(3,4,2))
X
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24
X[, , 1]
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
X[, , 2]
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24
X[2, 4, 2]
## [1] 23

(3) Dataframe (table)의 생성 및 선택

# 데이터프레임의 생성 및 선택

df  = data.frame(x = 1:3,
                  y = c('kim', 'lee', 'park'),
                  z = c(TRUE, TRUE, FALSE))
df
##   x    y     z
## 1 1  kim  TRUE
## 2 2  lee  TRUE
## 3 3 park FALSE
df$x
## [1] 1 2 3
df[,1]
## [1] 1 2 3
df[[1]]
## [1] 1 2 3
df[1,]
##   x   y    z
## 1 1 kim TRUE
df[3,3]
## [1] FALSE
x
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
y
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
z
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    0    0    0    0
## [2,]    0    1    0    0    0
## [3,]    0    0    1    0    0
## [4,]    0    0    0    1    0
## [5,]    0    0    0    0    1
a
## [1] 0 3 0 7 9
b
##  [1]  1  3  5  7  9  1  2  3  4  5  6  7  8  9 10
c
## [1] 1.0 1.5 2.0 2.5 3.0
out = list(a=a,b=b,c=c,x=x,y=y,z=z,df=df)
out
## $a
## [1] 0 3 0 7 9
## 
## $b
##  [1]  1  3  5  7  9  1  2  3  4  5  6  7  8  9 10
## 
## $c
## [1] 1.0 1.5 2.0 2.5 3.0
## 
## $x
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## $y
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
## 
## $z
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    0    0    0    0
## [2,]    0    1    0    0    0
## [3,]    0    0    1    0    0
## [4,]    0    0    0    1    0
## [5,]    0    0    0    0    1
## 
## $df
##   x    y     z
## 1 1  kim  TRUE
## 2 2  lee  TRUE
## 3 3 park FALSE
out$df
##   x    y     z
## 1 1  kim  TRUE
## 2 2  lee  TRUE
## 3 3 park FALSE
out$df[3,3]
## [1] FALSE
# 데이터프레임을 입력하기 위한 packages (openxlsx, readxl, data.table)
# Dataframe의 자료입력 
# install.packages("openxlsx")   # dataframe reading
# install.packages("readxl")     # tibble dataframe
# install.packages("data.table") # data.table dataframe
library(openxlsx)
library(readxl)
library(data.table)

df1 = read.xlsx("regress.xlsx")
df2 = read_excel("regress.xlsx")

df1
##    근무만족도 대인관계 자아개념 근무평정 SES점수
## 1          88       34       78       88      88
## 2          98       23       98       78      56
## 3          88       34       78       98      78
## 4          89       23       88       77      78
## 5          89       34       88       89      67
## 6          78       45       87       89      78
## 7          78       34       89       98      77
## 8          67       34       67       67      56
## 9          78       45       56       78      77
## 10         89       34       78       67      65
## 11         88       78       67       56      64
## 12         67       65       34       67      65
## 13         45       56       45       78      34
## 14         43       67       34       89      45
## 15         45       78       45       78      67
## 16         56       76       56       67      57
## 17         45       78       45       78      45
## 18         67       89       34       67      56
## 19         56       78       45       78      56
## 20         56       67       34       67      45
## 21         45       56       23       98      34
## 22         45       34       34       56      45
## 23         56       23       56       67      56
## 24         45       34       45       78      45
## 25         56       23       34       89      34
## 26         45       34       45       78      45
## 27         34       45       34       67      34
## 28         45       34       23       56      45
## 29         56       45       34       67      56
## 30         65       56       56       78      66
df2
## # A tibble: 30 × 5
##    근무만족도 대인관계 자아개념 근무평정 SES점수
##         <dbl>    <dbl>    <dbl>    <dbl>   <dbl>
##  1         88       34       78       88      88
##  2         98       23       98       78      56
##  3         88       34       78       98      78
##  4         89       23       88       77      78
##  5         89       34       88       89      67
##  6         78       45       87       89      78
##  7         78       34       89       98      77
##  8         67       34       67       67      56
##  9         78       45       56       78      77
## 10         89       34       78       67      65
## # ℹ 20 more rows
class(df1)
## [1] "data.frame"
class(df2)
## [1] "tbl_df"     "tbl"        "data.frame"
df3 = read.csv("tempbig.csv")
df4 = read.csv2("tempbig.csv")
df5 = fread("tempbig.csv")

system.time(read.csv("tempbig.csv"))
##  사용자  시스템 elapsed 
##    2.10    0.30    3.78
system.time(fread("tempbig.csv"))
##  사용자  시스템 elapsed 
##    0.06    0.00    0.03