- vector :같은 데이터 타입을 갖는 1차원 배열
- list :서로 다른 데이터 타입을 갖는 1차원 배열, 중첩 가능
- matrix :같은 데이터 타입을 갖는 2차원 배열
- array :같은 데이터 타입을 갖는 3차원 배열
- factor :목록, 범주형 데이터
- data.frame :서로 다른 데이터 타입을 갖는 컬럼으로 이루어진 2차원 배열
- table :data.frame과 동일한 구조를 가지며 속도가 빠르다
a<-c(1,2) ; a
## [1] 1 2
b<-list(c('king', 100)); b
## [[1]]
## [1] "king" "100"
c<-matrix(c(1,2)); c
## [,1]
## [1,] 1
## [2,] 2
d<-array(1:12, dim=c(2,2,3)); d
## , , 1
##
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
##
## , , 2
##
## [,1] [,2]
## [1,] 5 7
## [2,] 6 8
##
## , , 3
##
## [,1] [,2]
## [1,] 9 11
## [2,] 10 12
e<-factor(c('male', 'female')); e
## [1] male female
## Levels: female male
f<-data.frame(x=c(1,2)); f
## x
## 1 1
## 2 2
class(a); class(b); class(c); class(d); class(e); class(f)
## [1] "numeric"
## [1] "list"
## [1] "matrix"
## [1] "array"
## [1] "factor"
## [1] "data.frame"
mode(a); mode(b); mode(c); mode(d); mode(e); mode(f)
## [1] "numeric"
## [1] "list"
## [1] "numeric"
## [1] "numeric"
## [1] "numeric"
## [1] "list"
str(a); str(b); str(c); str(d); str(e); str(f)
## num [1:2] 1 2
## List of 1
## $ : chr [1:2] "king" "100"
## num [1:2, 1] 1 2
## int [1:2, 1:2, 1:3] 1 2 3 4 5 6 7 8 9 10 ...
## Factor w/ 2 levels "female","male": 2 1
## 'data.frame': 2 obs. of 1 variable:
## $ x: num 1 2
is.numeric(a); is.character(a); is.integer(a)
## [1] TRUE
## [1] FALSE
## [1] FALSE
is.factor(e); is.matrix(c); is.array(d); is.data.frame(f); is.list(b)
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
## [1] TRUE
-csv파일을 데이터 프레임으로 읽어들이는 함수
-getwd( )로 디렉토리 위치 확인
-setwd( )로 물리적인 디렉토리 위치 지정
emp<-read.csv(choose.files(), header = T)
str(emp) #last_name, email이 factor형이고 factor는 레벨링이 되기 때문에 문제가 될 수 있음
## 'data.frame': 107 obs. of 11 variables:
## $ EMPLOYEE_ID : int 198 199 200 201 202 203 204 205 206 100 ...
## $ FIRST_NAME : Factor w/ 91 levels "Adam","Alana",..: 21 22 39 59 66 83 31 76 90 80 ...
## $ LAST_NAME : Factor w/ 102 levels "Abel","Ande",..: 69 37 101 41 28 63 5 42 36 50 ...
## $ EMAIL : Factor w/ 107 levels "ABANDA","ABULL",..: 24 20 52 67 77 93 32 89 105 90 ...
## $ PHONE_NUMBER : Factor w/ 107 levels "011.44.1343.329268",..: 98 99 36 40 62 41 44 42 43 37 ...
## $ HIRE_DATE : int 20070621 20080113 20030917 20040217 20050817 20020607 20020607 20020607 20020607 20030617 ...
## $ JOB_ID : Factor w/ 19 levels "AC_ACCOUNT","AC_MGR",..: 17 17 3 10 11 8 12 2 1 4 ...
## $ SALARY : int 2600 2600 4400 13000 6000 6500 10000 12008 8300 29040 ...
## $ COMMISSION_PCT: num NA NA NA NA NA NA NA NA NA NA ...
## $ MANAGER_ID : int 124 124 101 100 201 101 101 101 205 NA ...
## $ DEPARTMENT_ID : int 50 50 10 20 20 40 70 110 110 90 ...
emp<-read.csv(choose.files(), header = F)
str(emp) #헤더를 가져오지 않으면 모든 값들이 factor형으로 들어온다
## 'data.frame': 108 obs. of 11 variables:
## $ V1 : Factor w/ 108 levels "100","101","102",..: 108 99 100 101 102 103 104 105 106 107 ...
## $ V2 : Factor w/ 92 levels "Adam","Alana",..: 26 21 22 40 60 67 84 32 77 91 ...
## $ V3 : Factor w/ 103 levels "Abel","Ande",..: 55 70 37 102 41 28 64 5 42 36 ...
## $ V4 : Factor w/ 108 levels "ABANDA","ABULL",..: 28 24 20 53 68 78 94 33 90 106 ...
## $ V5 : Factor w/ 108 levels "011.44.1343.329268",..: 108 98 99 36 40 62 41 44 42 43 ...
## $ V6 : Factor w/ 99 levels "20010113","20020607",..: 99 81 90 10 15 38 2 2 2 2 ...
## $ V7 : Factor w/ 20 levels "AC_ACCOUNT","AC_MGR",..: 10 18 18 3 11 12 8 13 2 1 ...
## $ V8 : Factor w/ 59 levels "10000","10500",..: 59 15 15 32 7 35 39 1 6 52 ...
## $ V9 : Factor w/ 9 levels "","0.1","0.15",..: 9 1 1 1 1 1 1 1 1 1 ...
## $ V10: Factor w/ 20 levels "","100","101",..: 20 12 12 3 2 18 3 3 3 19 ...
## $ V11: Factor w/ 13 levels "","10","100",..: 13 8 8 2 5 5 7 10 4 4 ...
emp<-read.csv(choose.files(), header=T, stringsAsFactors=F)
str(emp)
## 'data.frame': 107 obs. of 11 variables:
## $ EMPLOYEE_ID : int 198 199 200 201 202 203 204 205 206 100 ...
## $ FIRST_NAME : chr "Donald" "Douglas" "Jennifer" "Michael" ...
## $ LAST_NAME : chr "OConnell" "Grant" "Whalen" "Hartstein" ...
## $ EMAIL : chr "DOCONNEL" "DGRANT" "JWHALEN" "MHARTSTE" ...
## $ PHONE_NUMBER : chr "650.507.9833" "650.507.9844" "515.123.4444" "515.123.5555" ...
## $ HIRE_DATE : int 20070621 20080113 20030917 20040217 20050817 20020607 20020607 20020607 20020607 20030617 ...
## $ JOB_ID : chr "SH_CLERK" "SH_CLERK" "AD_ASST" "MK_MAN" ...
## $ SALARY : int 2600 2600 4400 13000 6000 6500 10000 12008 8300 29040 ...
## $ COMMISSION_PCT: num NA NA NA NA NA NA NA NA NA NA ...
## $ MANAGER_ID : int 124 124 101 100 201 101 101 101 205 NA ...
## $ DEPARTMENT_ID : int 50 50 10 20 20 40 70 110 110 90 ...
names(emp); emp$EMPLOYEE_ID; emp$EMPLOYEE_ID==100;
## [1] "EMPLOYEE_ID" "FIRST_NAME" "LAST_NAME" "EMAIL"
## [5] "PHONE_NUMBER" "HIRE_DATE" "JOB_ID" "SALARY"
## [9] "COMMISSION_PCT" "MANAGER_ID" "DEPARTMENT_ID"
## [1] 198 199 200 201 202 203 204 205 206 100 101 102 103 104 105 106 107
## [18] 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
## [35] 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
## [52] 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
## [69] 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
## [86] 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
## [103] 193 194 195 196 197
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
emp[emp$EMPLOYEE_ID==100,]; emp[emp$EMPLOYEE_ID==100,c('LAST_NAME', 'SALARY')]
## EMPLOYEE_ID FIRST_NAME LAST_NAME EMAIL PHONE_NUMBER HIRE_DATE JOB_ID
## 10 100 Steven King SKING 515.123.4567 20030617 AD_PRES
## SALARY COMMISSION_PCT MANAGER_ID DEPARTMENT_ID
## 10 29040 NA NA 90
## LAST_NAME SALARY
## 10 King 29040
-^ : 첫번째
-$ : 마지막
- . : 한 자리수
- * : wild card(%)
emp[grep("aa", emp$LAST_NAME),c("LAST_NAME","SALARY")]
## LAST_NAME SALARY
## 12 De Haan 17000
emp[grep("[x-z]", emp$LAST_NAME, ignore.case = TRUE),c("LAST_NAME","SALARY")]
## LAST_NAME SALARY
## 5 Fay 6000
## 9 Gietz 8300
## 17 Lorentz 4200
## 24 Raphaely 11000
## 35 Nayer 3200
## 37 Landry 2400
## 57 Errazuriz 12000
## 59 Zlotkey 10500
## 67 Sully 9500
## 72 Vishney 10500
## 78 Ozer 11500
## 80 Fox 9600
## 86 Taylor 8600
## 90 Taylor 3200
## 99 Dilly 3600
## 107 Feeney 3000
#ignore.case = TRUE :대소문자 구분 안한다.
#ignore.case = FALSE :대소문자 구분한다.
nchar('R Developer')
## [1] 11
nchar('R Developer', type="chars") #문자 타입의 숫자 결과값
## [1] 11
nchar('R Developer', type='bytes') #바이트 타입의 숫자 결과값
## [1] 11
nchar('빅데이터'); nchar('빅데이터', type="chars"); nchar('빅데이터', type='bytes') #한글=2bytes
## [1] 4
## [1] 4
## [1] 8
#strsplit('R Developer') :에러
strsplit('R Developer', split=character(0)); strsplit('R Developer', split=character(3))
## [[1]]
## [1] "R" " " "D" "e" "v" "e" "l" "o" "p" "e" "r"
## [[1]]
## [1] "R" " " "D" "e" "v" "e" "l" "o" "p" "e" "r"
strsplit('R Developer', split=' ') #공백 문자를 기준으로 분리한다.
## [[1]]
## [1] "R" "Developer"
strsplit('R Developer', split=',') #comma를 기준으로 분리한다,
## [[1]]
## [1] "R Developer"
str(strsplit('R Developer', split=','))
## List of 1
## $ : chr "R Developer"
str(unlist(strsplit('R Developer', split=','))) #리스트형을 벡터로
## chr "R Developer"
strsplit(emp$LAST_NAME, split=character(0)) #emp<-read.csv("emp.csv", header=T, stringsAsFactors=F)
## [[1]]
## [1] "O" "C" "o" "n" "n" "e" "l" "l"
##
## [[2]]
## [1] "G" "r" "a" "n" "t"
##
## [[3]]
## [1] "W" "h" "a" "l" "e" "n"
##
## [[4]]
## [1] "H" "a" "r" "t" "s" "t" "e" "i" "n"
##
## [[5]]
## [1] "F" "a" "y"
##
## [[6]]
## [1] "M" "a" "v" "r" "i" "s"
##
## [[7]]
## [1] "B" "a" "e" "r"
##
## [[8]]
## [1] "H" "i" "g" "g" "i" "n" "s"
##
## [[9]]
## [1] "G" "i" "e" "t" "z"
##
## [[10]]
## [1] "K" "i" "n" "g"
##
## [[11]]
## [1] "K" "o" "c" "h" "h" "a" "r"
##
## [[12]]
## [1] "D" "e" " " "H" "a" "a" "n"
##
## [[13]]
## [1] "H" "u" "n" "o" "l" "d"
##
## [[14]]
## [1] "E" "r" "n" "s" "t"
##
## [[15]]
## [1] "A" "u" "s" "t" "i" "n"
##
## [[16]]
## [1] "P" "a" "t" "a" "b" "a" "l" "l" "a"
##
## [[17]]
## [1] "L" "o" "r" "e" "n" "t" "z"
##
## [[18]]
## [1] "G" "r" "e" "e" "n" "b" "e" "r" "g"
##
## [[19]]
## [1] "F" "a" "v" "i" "e" "t"
##
## [[20]]
## [1] "C" "h" "e" "n"
##
## [[21]]
## [1] "S" "c" "i" "a" "r" "r" "a"
##
## [[22]]
## [1] "U" "r" "m" "a" "n"
##
## [[23]]
## [1] "P" "o" "p" "p"
##
## [[24]]
## [1] "R" "a" "p" "h" "a" "e" "l" "y"
##
## [[25]]
## [1] "K" "h" "o" "o"
##
## [[26]]
## [1] "B" "a" "i" "d" "a"
##
## [[27]]
## [1] "T" "o" "b" "i" "a" "s"
##
## [[28]]
## [1] "H" "i" "m" "u" "r" "o"
##
## [[29]]
## [1] "C" "o" "l" "m" "e" "n" "a" "r" "e" "s"
##
## [[30]]
## [1] "W" "e" "i" "s" "s"
##
## [[31]]
## [1] "F" "r" "i" "p" "p"
##
## [[32]]
## [1] "K" "a" "u" "f" "l" "i" "n" "g"
##
## [[33]]
## [1] "V" "o" "l" "l" "m" "a" "n"
##
## [[34]]
## [1] "M" "o" "u" "r" "g" "o" "s"
##
## [[35]]
## [1] "N" "a" "y" "e" "r"
##
## [[36]]
## [1] "M" "i" "k" "k" "i" "l" "i" "n" "e" "n" "i"
##
## [[37]]
## [1] "L" "a" "n" "d" "r" "y"
##
## [[38]]
## [1] "M" "a" "r" "k" "l" "e"
##
## [[39]]
## [1] "B" "i" "s" "s" "o" "t"
##
## [[40]]
## [1] "A" "t" "k" "i" "n" "s" "o" "n"
##
## [[41]]
## [1] "M" "a" "r" "l" "o" "w"
##
## [[42]]
## [1] "O" "l" "s" "o" "n"
##
## [[43]]
## [1] "M" "a" "l" "l" "i" "n"
##
## [[44]]
## [1] "R" "o" "g" "e" "r" "s"
##
## [[45]]
## [1] "G" "e" "e"
##
## [[46]]
## [1] "P" "h" "i" "l" "t" "a" "n" "k" "e" "r"
##
## [[47]]
## [1] "L" "a" "d" "w" "i" "g"
##
## [[48]]
## [1] "S" "t" "i" "l" "e" "s"
##
## [[49]]
## [1] "S" "e" "o"
##
## [[50]]
## [1] "P" "a" "t" "e" "l"
##
## [[51]]
## [1] "R" "a" "j" "s"
##
## [[52]]
## [1] "D" "a" "v" "i" "e" "s"
##
## [[53]]
## [1] "M" "a" "t" "o" "s"
##
## [[54]]
## [1] "V" "a" "r" "g" "a" "s"
##
## [[55]]
## [1] "R" "u" "s" "s" "e" "l" "l"
##
## [[56]]
## [1] "P" "a" "r" "t" "n" "e" "r" "s"
##
## [[57]]
## [1] "E" "r" "r" "a" "z" "u" "r" "i" "z"
##
## [[58]]
## [1] "C" "a" "m" "b" "r" "a" "u" "l" "t"
##
## [[59]]
## [1] "Z" "l" "o" "t" "k" "e" "y"
##
## [[60]]
## [1] "T" "u" "c" "k" "e" "r"
##
## [[61]]
## [1] "B" "e" "r" "n" "s" "t" "e" "i" "n"
##
## [[62]]
## [1] "H" "a" "l" "l"
##
## [[63]]
## [1] "O" "l" "s" "e" "n"
##
## [[64]]
## [1] "C" "a" "m" "b" "r" "a" "u" "l" "t"
##
## [[65]]
## [1] "T" "u" "v" "a" "u" "l" "t"
##
## [[66]]
## [1] "K" "i" "n" "g"
##
## [[67]]
## [1] "S" "u" "l" "l" "y"
##
## [[68]]
## [1] "M" "c" "E" "w" "e" "n"
##
## [[69]]
## [1] "S" "m" "i" "t" "h"
##
## [[70]]
## [1] "D" "o" "r" "a" "n"
##
## [[71]]
## [1] "S" "e" "w" "a" "l" "l"
##
## [[72]]
## [1] "V" "i" "s" "h" "n" "e" "y"
##
## [[73]]
## [1] "G" "r" "e" "e" "n" "e"
##
## [[74]]
## [1] "M" "a" "r" "v" "i" "n" "s"
##
## [[75]]
## [1] "L" "e" "e"
##
## [[76]]
## [1] "A" "n" "d" "e"
##
## [[77]]
## [1] "B" "a" "n" "d" "a"
##
## [[78]]
## [1] "O" "z" "e" "r"
##
## [[79]]
## [1] "B" "l" "o" "o" "m"
##
## [[80]]
## [1] "F" "o" "x"
##
## [[81]]
## [1] "S" "m" "i" "t" "h"
##
## [[82]]
## [1] "B" "a" "t" "e" "s"
##
## [[83]]
## [1] "K" "u" "m" "a" "r"
##
## [[84]]
## [1] "A" "b" "e" "l"
##
## [[85]]
## [1] "H" "u" "t" "t" "o" "n"
##
## [[86]]
## [1] "T" "a" "y" "l" "o" "r"
##
## [[87]]
## [1] "L" "i" "v" "i" "n" "g" "s" "t" "o" "n"
##
## [[88]]
## [1] "G" "r" "a" "n" "t"
##
## [[89]]
## [1] "J" "o" "h" "n" "s" "o" "n"
##
## [[90]]
## [1] "T" "a" "y" "l" "o" "r"
##
## [[91]]
## [1] "F" "l" "e" "a" "u" "r"
##
## [[92]]
## [1] "S" "u" "l" "l" "i" "v" "a" "n"
##
## [[93]]
## [1] "G" "e" "o" "n" "i"
##
## [[94]]
## [1] "S" "a" "r" "c" "h" "a" "n" "d"
##
## [[95]]
## [1] "B" "u" "l" "l"
##
## [[96]]
## [1] "D" "e" "l" "l" "i" "n" "g" "e" "r"
##
## [[97]]
## [1] "C" "a" "b" "r" "i" "o"
##
## [[98]]
## [1] "C" "h" "u" "n" "g"
##
## [[99]]
## [1] "D" "i" "l" "l" "y"
##
## [[100]]
## [1] "G" "a" "t" "e" "s"
##
## [[101]]
## [1] "P" "e" "r" "k" "i" "n" "s"
##
## [[102]]
## [1] "B" "e" "l" "l"
##
## [[103]]
## [1] "E" "v" "e" "r" "e" "t" "t"
##
## [[104]]
## [1] "M" "c" "C" "a" "i" "n"
##
## [[105]]
## [1] "J" "o" "n" "e" "s"
##
## [[106]]
## [1] "W" "a" "l" "s" "h"
##
## [[107]]
## [1] "F" "e" "e" "n" "e" "y"
toupper('r developer')
## [1] "R DEVELOPER"
tolower('R DEVELOPER')
## [1] "r developer"
substr('R Developer', 1, 1) #substr(' ', 시작점, 끝점)
## [1] "R"
substr('1,2,3,4,5,6,7,8,9', 1, 1); substr('1,2,3,4,5,6,7,8,9', 1, 2); substr('1,2,3,4,5,6,7,8,9', 5, 5)
## [1] "1"
## [1] "1,"
## [1] "3"
-sub('찾을 대상', '원본에서 찾은 찾을 대상을 대체할 대상', '원본 대상')
-sub('a', 'b', 'ac') >> 결과 :bc
sub('R', 'Python', 'R programmer R Developer')
## [1] "Python programmer R Developer"
gsub('R', 'Python', 'R programmer R Developer')
## [1] "Python programmer Python Developer"
gsub('[0-2]', '*', '120304')
## [1] "***3*4"
round(49.926); round(49.326, 3); round(49.326, -5) #round(대상, 자리 수)
## [1] 50
## [1] 49.326
## [1] 0
trunc(49.926); trunc(49.926, 2); trunc(49.926, -4)
## [1] 49
## [1] 49
## [1] 49
signif(49.326, 1); signif(49.326, 2) #signif( , n) :n은 맨 앞 숫자를 기준으로 각 숫자들의 위치
## [1] 50
## [1] 49
floor(45.926)
## [1] 45
Sys.Date(); Sys.time(); date()
## [1] "2018-07-25"
## [1] "2018-07-25 17:19:18 KST"
## [1] "Wed Jul 25 17:19:18 2018"
as.Date('2018-07-25'); as.Date('2018/07/25') #as.Date('20180725') :에러
## [1] "2018-07-25"
## [1] "2018-07-25"
as.Date('20180725', format='%Y%m%d')
## [1] "2018-07-25"
#%Y :세기를 포함한 년도(4자리)
#%y :세기를 생략한 년도(2자리)
#%m :숫자 달
#%B :문자달
#%d :일
#%A :요일
#%u :숫자 요일(1~7:월~일)
#%w :숫자 요일(0~6:일~토)
#%H :시
#%M :분
#%S :초
as.Date('2018년 1월 2일', format='%Y년%m월%d일')
## [1] "2018-01-02"
format(Sys.time(),'%y%m%d %A')
## [1] "180725 수요일"