V <- c(1214,1110,819)
names(V) <- c("양혜정", "김동균", "허준석")
V[c(2,3)]
## 김동균 허준석
## 1110 819
L <- c(314, 130, 222)
names(L) <- c("강승호", "권민호", "길민주")
L[c(1,3)]
## 강승호 길민주
## 314 222
행렬은 차원을 가진 벡터이다.
행렬은 열을 순서대로 채워나간다.
M <- c("허준석", "김동균", "양혜정", "이예은", "김대영", "김상현", "강승호", "조준오")
dim(M) <- c(2,4)
M
## [,1] [,2] [,3] [,4]
## [1,] "허준석" "양혜정" "김대영" "강승호"
## [2,] "김동균" "이예은" "김상현" "조준오"
Mr <- c("권민호", "길민주", "임규빈", "추혁")
M <- rbind(M, Mr)
M
## [,1] [,2] [,3] [,4]
## "허준석" "양혜정" "김대영" "강승호"
## "김동균" "이예은" "김상현" "조준오"
## Mr "권민호" "길민주" "임규빈" "추혁"
Mc <- c("정해인", "이현준", "오창민")
M <- cbind(M, Mc)
M
## Mc
## "허준석" "양혜정" "김대영" "강승호" "정해인"
## "김동균" "이예은" "김상현" "조준오" "이현준"
## Mr "권민호" "길민주" "임규빈" "추혁" "오창민"
데이터프레임은 차원을 가진 리스트이다.
데이터프레임은 엑셀의 시트(Sheet)와 같은 역할을 한다.
name <- c("이동헌", "오동희", "김지원", "유성일", "양혜정")
birth <- c(819, 201, 626, 626, 1214)
male <- c(TRUE, TRUE, TRUE, TRUE, FALSE)
class <- data.frame(name, birth, male)
class
## name birth male
## 1 이동헌 819 TRUE
## 2 오동희 201 TRUE
## 3 김지원 626 TRUE
## 4 유성일 626 TRUE
## 5 양혜정 1214 FALSE
데이터 재정렬을 위한 패키지
밀집화(aggregation)는기존 데이터가 가진 많은 정보들을 손실하게 되는 반면,
재정렬(reshape)은 원래 데이터가 가지고 있는 모든 정보들을 그대로 유지한다.
library(reshape)
search()
## [1] ".GlobalEnv" "package:reshape" "package:stats"
## [4] "package:graphics" "package:grDevices" "package:utils"
## [7] "package:datasets" "package:methods" "Autoloads"
## [10] "package:base"
air quality data : 153일 동안의 공기 질(air quality) 측정데이터.
Ozone : 평균 오존량
Solar.R : 태양 복사
wind : 평균 풍속
Temp : 최대 온도
Month : 월
Day : 일
data(airquality)
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
sum(is.na(airquality))
## [1] 44
names(airquality)
## [1] "Ozone" "Solar.R" "Wind" "Temp" "Month" "Day"
names(airquality) <- tolower(names(airquality))
names(airquality)
## [1] "ozone" "solar.r" "wind" "temp" "month" "day"
aqm <- melt(airquality, id=c("month", "day"), na.rm = TRUE)
aqm
## month day variable value
## 1 5 1 ozone 41.0
## 2 5 2 ozone 36.0
## 3 5 3 ozone 12.0
## 4 5 4 ozone 18.0
## 5 5 6 ozone 28.0
## 6 5 7 ozone 23.0
## 7 5 8 ozone 19.0
## 8 5 9 ozone 8.0
## 9 5 11 ozone 7.0
## 10 5 12 ozone 16.0
## 11 5 13 ozone 11.0
## 12 5 14 ozone 14.0
## 13 5 15 ozone 18.0
## 14 5 16 ozone 14.0
## 15 5 17 ozone 34.0
## 16 5 18 ozone 6.0
## 17 5 19 ozone 30.0
## 18 5 20 ozone 11.0
## 19 5 21 ozone 1.0
## 20 5 22 ozone 11.0
## 21 5 23 ozone 4.0
## 22 5 24 ozone 32.0
## 23 5 28 ozone 23.0
## 24 5 29 ozone 45.0
## 25 5 30 ozone 115.0
## 26 5 31 ozone 37.0
## 27 6 7 ozone 29.0
## 28 6 9 ozone 71.0
## 29 6 10 ozone 39.0
## 30 6 13 ozone 23.0
## 31 6 16 ozone 21.0
## 32 6 17 ozone 37.0
## 33 6 18 ozone 20.0
## 34 6 19 ozone 12.0
## 35 6 20 ozone 13.0
## 36 7 1 ozone 135.0
## 37 7 2 ozone 49.0
## 38 7 3 ozone 32.0
## 39 7 5 ozone 64.0
## 40 7 6 ozone 40.0
## 41 7 7 ozone 77.0
## 42 7 8 ozone 97.0
## 43 7 9 ozone 97.0
## 44 7 10 ozone 85.0
## 45 7 12 ozone 10.0
## 46 7 13 ozone 27.0
## 47 7 15 ozone 7.0
## 48 7 16 ozone 48.0
## 49 7 17 ozone 35.0
## 50 7 18 ozone 61.0
## 51 7 19 ozone 79.0
## 52 7 20 ozone 63.0
## 53 7 21 ozone 16.0
## 54 7 24 ozone 80.0
## 55 7 25 ozone 108.0
## 56 7 26 ozone 20.0
## 57 7 27 ozone 52.0
## 58 7 28 ozone 82.0
## 59 7 29 ozone 50.0
## 60 7 30 ozone 64.0
## 61 7 31 ozone 59.0
## 62 8 1 ozone 39.0
## 63 8 2 ozone 9.0
## 64 8 3 ozone 16.0
## 65 8 4 ozone 78.0
## 66 8 5 ozone 35.0
## 67 8 6 ozone 66.0
## 68 8 7 ozone 122.0
## 69 8 8 ozone 89.0
## 70 8 9 ozone 110.0
## 71 8 12 ozone 44.0
## 72 8 13 ozone 28.0
## 73 8 14 ozone 65.0
## 74 8 16 ozone 22.0
## 75 8 17 ozone 59.0
## 76 8 18 ozone 23.0
## 77 8 19 ozone 31.0
## 78 8 20 ozone 44.0
## 79 8 21 ozone 21.0
## 80 8 22 ozone 9.0
## 81 8 24 ozone 45.0
## 82 8 25 ozone 168.0
## 83 8 26 ozone 73.0
## 84 8 28 ozone 76.0
## 85 8 29 ozone 118.0
## 86 8 30 ozone 84.0
## 87 8 31 ozone 85.0
## 88 9 1 ozone 96.0
## 89 9 2 ozone 78.0
## 90 9 3 ozone 73.0
## 91 9 4 ozone 91.0
## 92 9 5 ozone 47.0
## 93 9 6 ozone 32.0
## 94 9 7 ozone 20.0
## 95 9 8 ozone 23.0
## 96 9 9 ozone 21.0
## 97 9 10 ozone 24.0
## 98 9 11 ozone 44.0
## 99 9 12 ozone 21.0
## 100 9 13 ozone 28.0
## 101 9 14 ozone 9.0
## 102 9 15 ozone 13.0
## 103 9 16 ozone 46.0
## 104 9 17 ozone 18.0
## 105 9 18 ozone 13.0
## 106 9 19 ozone 24.0
## 107 9 20 ozone 16.0
## 108 9 21 ozone 13.0
## 109 9 22 ozone 23.0
## 110 9 23 ozone 36.0
## 111 9 24 ozone 7.0
## 112 9 25 ozone 14.0
## 113 9 26 ozone 30.0
## 114 9 28 ozone 14.0
## 115 9 29 ozone 18.0
## 116 9 30 ozone 20.0
## 117 5 1 solar.r 190.0
## 118 5 2 solar.r 118.0
## 119 5 3 solar.r 149.0
## 120 5 4 solar.r 313.0
## 121 5 7 solar.r 299.0
## 122 5 8 solar.r 99.0
## 123 5 9 solar.r 19.0
## 124 5 10 solar.r 194.0
## 125 5 12 solar.r 256.0
## 126 5 13 solar.r 290.0
## 127 5 14 solar.r 274.0
## 128 5 15 solar.r 65.0
## 129 5 16 solar.r 334.0
## 130 5 17 solar.r 307.0
## 131 5 18 solar.r 78.0
## 132 5 19 solar.r 322.0
## 133 5 20 solar.r 44.0
## 134 5 21 solar.r 8.0
## 135 5 22 solar.r 320.0
## 136 5 23 solar.r 25.0
## 137 5 24 solar.r 92.0
## 138 5 25 solar.r 66.0
## 139 5 26 solar.r 266.0
## 140 5 28 solar.r 13.0
## 141 5 29 solar.r 252.0
## 142 5 30 solar.r 223.0
## 143 5 31 solar.r 279.0
## 144 6 1 solar.r 286.0
## 145 6 2 solar.r 287.0
## 146 6 3 solar.r 242.0
## 147 6 4 solar.r 186.0
## 148 6 5 solar.r 220.0
## 149 6 6 solar.r 264.0
## 150 6 7 solar.r 127.0
## 151 6 8 solar.r 273.0
## 152 6 9 solar.r 291.0
## 153 6 10 solar.r 323.0
## 154 6 11 solar.r 259.0
## 155 6 12 solar.r 250.0
## 156 6 13 solar.r 148.0
## 157 6 14 solar.r 332.0
## 158 6 15 solar.r 322.0
## 159 6 16 solar.r 191.0
## 160 6 17 solar.r 284.0
## 161 6 18 solar.r 37.0
## 162 6 19 solar.r 120.0
## 163 6 20 solar.r 137.0
## 164 6 21 solar.r 150.0
## 165 6 22 solar.r 59.0
## 166 6 23 solar.r 91.0
## 167 6 24 solar.r 250.0
## 168 6 25 solar.r 135.0
## 169 6 26 solar.r 127.0
## 170 6 27 solar.r 47.0
## 171 6 28 solar.r 98.0
## 172 6 29 solar.r 31.0
## 173 6 30 solar.r 138.0
## 174 7 1 solar.r 269.0
## 175 7 2 solar.r 248.0
## 176 7 3 solar.r 236.0
## 177 7 4 solar.r 101.0
## 178 7 5 solar.r 175.0
## 179 7 6 solar.r 314.0
## 180 7 7 solar.r 276.0
## 181 7 8 solar.r 267.0
## 182 7 9 solar.r 272.0
## 183 7 10 solar.r 175.0
## 184 7 11 solar.r 139.0
## 185 7 12 solar.r 264.0
## 186 7 13 solar.r 175.0
## 187 7 14 solar.r 291.0
## 188 7 15 solar.r 48.0
## 189 7 16 solar.r 260.0
## 190 7 17 solar.r 274.0
## 191 7 18 solar.r 285.0
## 192 7 19 solar.r 187.0
## 193 7 20 solar.r 220.0
## 194 7 21 solar.r 7.0
## 195 7 22 solar.r 258.0
## 196 7 23 solar.r 295.0
## 197 7 24 solar.r 294.0
## 198 7 25 solar.r 223.0
## 199 7 26 solar.r 81.0
## 200 7 27 solar.r 82.0
## 201 7 28 solar.r 213.0
## 202 7 29 solar.r 275.0
## 203 7 30 solar.r 253.0
## 204 7 31 solar.r 254.0
## 205 8 1 solar.r 83.0
## 206 8 2 solar.r 24.0
## 207 8 3 solar.r 77.0
## 208 8 7 solar.r 255.0
## 209 8 8 solar.r 229.0
## 210 8 9 solar.r 207.0
## 211 8 10 solar.r 222.0
## 212 8 11 solar.r 137.0
## 213 8 12 solar.r 192.0
## 214 8 13 solar.r 273.0
## 215 8 14 solar.r 157.0
## 216 8 15 solar.r 64.0
## 217 8 16 solar.r 71.0
## 218 8 17 solar.r 51.0
## 219 8 18 solar.r 115.0
## 220 8 19 solar.r 244.0
## 221 8 20 solar.r 190.0
## 222 8 21 solar.r 259.0
## 223 8 22 solar.r 36.0
## 224 8 23 solar.r 255.0
## 225 8 24 solar.r 212.0
## 226 8 25 solar.r 238.0
## 227 8 26 solar.r 215.0
## 228 8 27 solar.r 153.0
## 229 8 28 solar.r 203.0
## 230 8 29 solar.r 225.0
## 231 8 30 solar.r 237.0
## 232 8 31 solar.r 188.0
## 233 9 1 solar.r 167.0
## 234 9 2 solar.r 197.0
## 235 9 3 solar.r 183.0
## 236 9 4 solar.r 189.0
## 237 9 5 solar.r 95.0
## 238 9 6 solar.r 92.0
## 239 9 7 solar.r 252.0
## 240 9 8 solar.r 220.0
## 241 9 9 solar.r 230.0
## 242 9 10 solar.r 259.0
## 243 9 11 solar.r 236.0
## 244 9 12 solar.r 259.0
## 245 9 13 solar.r 238.0
## 246 9 14 solar.r 24.0
## 247 9 15 solar.r 112.0
## 248 9 16 solar.r 237.0
## 249 9 17 solar.r 224.0
## 250 9 18 solar.r 27.0
## 251 9 19 solar.r 238.0
## 252 9 20 solar.r 201.0
## 253 9 21 solar.r 238.0
## 254 9 22 solar.r 14.0
## 255 9 23 solar.r 139.0
## 256 9 24 solar.r 49.0
## 257 9 25 solar.r 20.0
## 258 9 26 solar.r 193.0
## 259 9 27 solar.r 145.0
## 260 9 28 solar.r 191.0
## 261 9 29 solar.r 131.0
## 262 9 30 solar.r 223.0
## 263 5 1 wind 7.4
## 264 5 2 wind 8.0
## 265 5 3 wind 12.6
## 266 5 4 wind 11.5
## 267 5 5 wind 14.3
## 268 5 6 wind 14.9
## 269 5 7 wind 8.6
## 270 5 8 wind 13.8
## 271 5 9 wind 20.1
## 272 5 10 wind 8.6
## 273 5 11 wind 6.9
## 274 5 12 wind 9.7
## 275 5 13 wind 9.2
## 276 5 14 wind 10.9
## 277 5 15 wind 13.2
## 278 5 16 wind 11.5
## 279 5 17 wind 12.0
## 280 5 18 wind 18.4
## 281 5 19 wind 11.5
## 282 5 20 wind 9.7
## 283 5 21 wind 9.7
## 284 5 22 wind 16.6
## 285 5 23 wind 9.7
## 286 5 24 wind 12.0
## 287 5 25 wind 16.6
## 288 5 26 wind 14.9
## 289 5 27 wind 8.0
## 290 5 28 wind 12.0
## 291 5 29 wind 14.9
## 292 5 30 wind 5.7
## 293 5 31 wind 7.4
## 294 6 1 wind 8.6
## 295 6 2 wind 9.7
## 296 6 3 wind 16.1
## 297 6 4 wind 9.2
## 298 6 5 wind 8.6
## 299 6 6 wind 14.3
## 300 6 7 wind 9.7
## 301 6 8 wind 6.9
## 302 6 9 wind 13.8
## 303 6 10 wind 11.5
## 304 6 11 wind 10.9
## 305 6 12 wind 9.2
## 306 6 13 wind 8.0
## 307 6 14 wind 13.8
## 308 6 15 wind 11.5
## 309 6 16 wind 14.9
## 310 6 17 wind 20.7
## 311 6 18 wind 9.2
## 312 6 19 wind 11.5
## 313 6 20 wind 10.3
## 314 6 21 wind 6.3
## 315 6 22 wind 1.7
## 316 6 23 wind 4.6
## 317 6 24 wind 6.3
## 318 6 25 wind 8.0
## 319 6 26 wind 8.0
## 320 6 27 wind 10.3
## 321 6 28 wind 11.5
## 322 6 29 wind 14.9
## 323 6 30 wind 8.0
## 324 7 1 wind 4.1
## 325 7 2 wind 9.2
## 326 7 3 wind 9.2
## 327 7 4 wind 10.9
## 328 7 5 wind 4.6
## 329 7 6 wind 10.9
## 330 7 7 wind 5.1
## 331 7 8 wind 6.3
## 332 7 9 wind 5.7
## 333 7 10 wind 7.4
## 334 7 11 wind 8.6
## 335 7 12 wind 14.3
## 336 7 13 wind 14.9
## 337 7 14 wind 14.9
## 338 7 15 wind 14.3
## 339 7 16 wind 6.9
## 340 7 17 wind 10.3
## 341 7 18 wind 6.3
## 342 7 19 wind 5.1
## 343 7 20 wind 11.5
## 344 7 21 wind 6.9
## 345 7 22 wind 9.7
## 346 7 23 wind 11.5
## 347 7 24 wind 8.6
## 348 7 25 wind 8.0
## 349 7 26 wind 8.6
## 350 7 27 wind 12.0
## 351 7 28 wind 7.4
## 352 7 29 wind 7.4
## 353 7 30 wind 7.4
## 354 7 31 wind 9.2
## 355 8 1 wind 6.9
## 356 8 2 wind 13.8
## 357 8 3 wind 7.4
## 358 8 4 wind 6.9
## 359 8 5 wind 7.4
## 360 8 6 wind 4.6
## 361 8 7 wind 4.0
## 362 8 8 wind 10.3
## 363 8 9 wind 8.0
## 364 8 10 wind 8.6
## 365 8 11 wind 11.5
## 366 8 12 wind 11.5
## 367 8 13 wind 11.5
## 368 8 14 wind 9.7
## 369 8 15 wind 11.5
## 370 8 16 wind 10.3
## 371 8 17 wind 6.3
## 372 8 18 wind 7.4
## 373 8 19 wind 10.9
## 374 8 20 wind 10.3
## 375 8 21 wind 15.5
## 376 8 22 wind 14.3
## 377 8 23 wind 12.6
## 378 8 24 wind 9.7
## 379 8 25 wind 3.4
## 380 8 26 wind 8.0
## 381 8 27 wind 5.7
## 382 8 28 wind 9.7
## 383 8 29 wind 2.3
## 384 8 30 wind 6.3
## 385 8 31 wind 6.3
## 386 9 1 wind 6.9
## 387 9 2 wind 5.1
## 388 9 3 wind 2.8
## 389 9 4 wind 4.6
## 390 9 5 wind 7.4
## 391 9 6 wind 15.5
## 392 9 7 wind 10.9
## 393 9 8 wind 10.3
## 394 9 9 wind 10.9
## 395 9 10 wind 9.7
## 396 9 11 wind 14.9
## 397 9 12 wind 15.5
## 398 9 13 wind 6.3
## 399 9 14 wind 10.9
## 400 9 15 wind 11.5
## 401 9 16 wind 6.9
## 402 9 17 wind 13.8
## 403 9 18 wind 10.3
## 404 9 19 wind 10.3
## 405 9 20 wind 8.0
## 406 9 21 wind 12.6
## 407 9 22 wind 9.2
## 408 9 23 wind 10.3
## 409 9 24 wind 10.3
## 410 9 25 wind 16.6
## 411 9 26 wind 6.9
## 412 9 27 wind 13.2
## 413 9 28 wind 14.3
## 414 9 29 wind 8.0
## 415 9 30 wind 11.5
## 416 5 1 temp 67.0
## 417 5 2 temp 72.0
## 418 5 3 temp 74.0
## 419 5 4 temp 62.0
## 420 5 5 temp 56.0
## 421 5 6 temp 66.0
## 422 5 7 temp 65.0
## 423 5 8 temp 59.0
## 424 5 9 temp 61.0
## 425 5 10 temp 69.0
## 426 5 11 temp 74.0
## 427 5 12 temp 69.0
## 428 5 13 temp 66.0
## 429 5 14 temp 68.0
## 430 5 15 temp 58.0
## 431 5 16 temp 64.0
## 432 5 17 temp 66.0
## 433 5 18 temp 57.0
## 434 5 19 temp 68.0
## 435 5 20 temp 62.0
## 436 5 21 temp 59.0
## 437 5 22 temp 73.0
## 438 5 23 temp 61.0
## 439 5 24 temp 61.0
## 440 5 25 temp 57.0
## 441 5 26 temp 58.0
## 442 5 27 temp 57.0
## 443 5 28 temp 67.0
## 444 5 29 temp 81.0
## 445 5 30 temp 79.0
## 446 5 31 temp 76.0
## 447 6 1 temp 78.0
## 448 6 2 temp 74.0
## 449 6 3 temp 67.0
## 450 6 4 temp 84.0
## 451 6 5 temp 85.0
## 452 6 6 temp 79.0
## 453 6 7 temp 82.0
## 454 6 8 temp 87.0
## 455 6 9 temp 90.0
## 456 6 10 temp 87.0
## 457 6 11 temp 93.0
## 458 6 12 temp 92.0
## 459 6 13 temp 82.0
## 460 6 14 temp 80.0
## 461 6 15 temp 79.0
## 462 6 16 temp 77.0
## 463 6 17 temp 72.0
## 464 6 18 temp 65.0
## 465 6 19 temp 73.0
## 466 6 20 temp 76.0
## 467 6 21 temp 77.0
## 468 6 22 temp 76.0
## 469 6 23 temp 76.0
## 470 6 24 temp 76.0
## 471 6 25 temp 75.0
## 472 6 26 temp 78.0
## 473 6 27 temp 73.0
## 474 6 28 temp 80.0
## 475 6 29 temp 77.0
## 476 6 30 temp 83.0
## 477 7 1 temp 84.0
## 478 7 2 temp 85.0
## 479 7 3 temp 81.0
## 480 7 4 temp 84.0
## 481 7 5 temp 83.0
## 482 7 6 temp 83.0
## 483 7 7 temp 88.0
## 484 7 8 temp 92.0
## 485 7 9 temp 92.0
## 486 7 10 temp 89.0
## 487 7 11 temp 82.0
## 488 7 12 temp 73.0
## 489 7 13 temp 81.0
## 490 7 14 temp 91.0
## 491 7 15 temp 80.0
## 492 7 16 temp 81.0
## 493 7 17 temp 82.0
## 494 7 18 temp 84.0
## 495 7 19 temp 87.0
## 496 7 20 temp 85.0
## 497 7 21 temp 74.0
## 498 7 22 temp 81.0
## 499 7 23 temp 82.0
## 500 7 24 temp 86.0
## 501 7 25 temp 85.0
## 502 7 26 temp 82.0
## 503 7 27 temp 86.0
## 504 7 28 temp 88.0
## 505 7 29 temp 86.0
## 506 7 30 temp 83.0
## 507 7 31 temp 81.0
## 508 8 1 temp 81.0
## 509 8 2 temp 81.0
## 510 8 3 temp 82.0
## 511 8 4 temp 86.0
## 512 8 5 temp 85.0
## 513 8 6 temp 87.0
## 514 8 7 temp 89.0
## 515 8 8 temp 90.0
## 516 8 9 temp 90.0
## 517 8 10 temp 92.0
## 518 8 11 temp 86.0
## 519 8 12 temp 86.0
## 520 8 13 temp 82.0
## 521 8 14 temp 80.0
## 522 8 15 temp 79.0
## 523 8 16 temp 77.0
## 524 8 17 temp 79.0
## 525 8 18 temp 76.0
## 526 8 19 temp 78.0
## 527 8 20 temp 78.0
## 528 8 21 temp 77.0
## 529 8 22 temp 72.0
## 530 8 23 temp 75.0
## 531 8 24 temp 79.0
## 532 8 25 temp 81.0
## 533 8 26 temp 86.0
## 534 8 27 temp 88.0
## 535 8 28 temp 97.0
## 536 8 29 temp 94.0
## 537 8 30 temp 96.0
## 538 8 31 temp 94.0
## 539 9 1 temp 91.0
## 540 9 2 temp 92.0
## 541 9 3 temp 93.0
## 542 9 4 temp 93.0
## 543 9 5 temp 87.0
## 544 9 6 temp 84.0
## 545 9 7 temp 80.0
## 546 9 8 temp 78.0
## 547 9 9 temp 75.0
## 548 9 10 temp 73.0
## 549 9 11 temp 81.0
## 550 9 12 temp 76.0
## 551 9 13 temp 77.0
## 552 9 14 temp 71.0
## 553 9 15 temp 71.0
## 554 9 16 temp 78.0
## 555 9 17 temp 67.0
## 556 9 18 temp 76.0
## 557 9 19 temp 68.0
## 558 9 20 temp 82.0
## 559 9 21 temp 64.0
## 560 9 22 temp 71.0
## 561 9 23 temp 81.0
## 562 9 24 temp 69.0
## 563 9 25 temp 63.0
## 564 9 26 temp 70.0
## 565 9 27 temp 77.0
## 566 9 28 temp 75.0
## 567 9 29 temp 76.0
## 568 9 30 temp 68.0
a <- cast(aqm, day ~ month ~ variable)
a
## , , variable = ozone
##
## month
## day 5 6 7 8 9
## 1 41 NA 135 39 96
## 2 36 NA 49 9 78
## 3 12 NA 32 16 73
## 4 18 NA NA 78 91
## 5 NA NA 64 35 47
## 6 28 NA 40 66 32
## 7 23 29 77 122 20
## 8 19 NA 97 89 23
## 9 8 71 97 110 21
## 10 NA 39 85 NA 24
## 11 7 NA NA NA 44
## 12 16 NA 10 44 21
## 13 11 23 27 28 28
## 14 14 NA NA 65 9
## 15 18 NA 7 NA 13
## 16 14 21 48 22 46
## 17 34 37 35 59 18
## 18 6 20 61 23 13
## 19 30 12 79 31 24
## 20 11 13 63 44 16
## 21 1 NA 16 21 13
## 22 11 NA NA 9 23
## 23 4 NA NA NA 36
## 24 32 NA 80 45 7
## 25 NA NA 108 168 14
## 26 NA NA 20 73 30
## 27 NA NA 52 NA NA
## 28 23 NA 82 76 14
## 29 45 NA 50 118 18
## 30 115 NA 64 84 20
## 31 37 NA 59 85 NA
##
## , , variable = solar.r
##
## month
## day 5 6 7 8 9
## 1 190 286 269 83 167
## 2 118 287 248 24 197
## 3 149 242 236 77 183
## 4 313 186 101 NA 189
## 5 NA 220 175 NA 95
## 6 NA 264 314 NA 92
## 7 299 127 276 255 252
## 8 99 273 267 229 220
## 9 19 291 272 207 230
## 10 194 323 175 222 259
## 11 NA 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157 24
## 15 65 322 48 64 112
## 16 334 191 260 71 237
## 17 307 284 274 51 224
## 18 78 37 285 115 27
## 19 322 120 187 244 238
## 20 44 137 220 190 201
## 21 8 150 7 259 238
## 22 320 59 258 36 14
## 23 25 91 295 255 139
## 24 92 250 294 212 49
## 25 66 135 223 238 20
## 26 266 127 81 215 193
## 27 NA 47 82 153 145
## 28 13 98 213 203 191
## 29 252 31 275 225 131
## 30 223 138 253 237 223
## 31 279 NA 254 188 NA
##
## , , variable = wind
##
## month
## day 5 6 7 8 9
## 1 7.4 8.6 4.1 6.9 6.9
## 2 8.0 9.7 9.2 13.8 5.1
## 3 12.6 16.1 9.2 7.4 2.8
## 4 11.5 9.2 10.9 6.9 4.6
## 5 14.3 8.6 4.6 7.4 7.4
## 6 14.9 14.3 10.9 4.6 15.5
## 7 8.6 9.7 5.1 4.0 10.9
## 8 13.8 6.9 6.3 10.3 10.3
## 9 20.1 13.8 5.7 8.0 10.9
## 10 8.6 11.5 7.4 8.6 9.7
## 11 6.9 10.9 8.6 11.5 14.9
## 12 9.7 9.2 14.3 11.5 15.5
## 13 9.2 8.0 14.9 11.5 6.3
## 14 10.9 13.8 14.9 9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9 6.9 10.3 6.9
## 17 12.0 20.7 10.3 6.3 13.8
## 18 18.4 9.2 6.3 7.4 10.3
## 19 11.5 11.5 5.1 10.9 10.3
## 20 9.7 10.3 11.5 10.3 8.0
## 21 9.7 6.3 6.9 15.5 12.6
## 22 16.6 1.7 9.7 14.3 9.2
## 23 9.7 4.6 11.5 12.6 10.3
## 24 12.0 6.3 8.6 9.7 10.3
## 25 16.6 8.0 8.0 3.4 16.6
## 26 14.9 8.0 8.6 8.0 6.9
## 27 8.0 10.3 12.0 5.7 13.2
## 28 12.0 11.5 7.4 9.7 14.3
## 29 14.9 14.9 7.4 2.3 8.0
## 30 5.7 8.0 7.4 6.3 11.5
## 31 7.4 NA 9.2 6.3 NA
##
## , , variable = temp
##
## month
## day 5 6 7 8 9
## 1 67 78 84 81 91
## 2 72 74 85 81 92
## 3 74 67 81 82 93
## 4 62 84 84 86 93
## 5 56 85 83 85 87
## 6 66 79 83 87 84
## 7 65 82 88 89 80
## 8 59 87 92 90 78
## 9 61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NA 81 94 NA
b <- cast(aqm, month ~ variable, mean)
b
## month ozone solar.r wind temp
## 1 5 23.61538 181.2963 11.622581 65.54839
## 2 6 29.44444 190.1667 10.266667 79.10000
## 3 7 59.11538 216.4839 8.941935 83.90323
## 4 8 59.96154 171.8571 8.793548 83.96774
## 5 9 31.44828 167.4333 10.180000 76.90000
c <- cast(aqm, month ~ . |variable, mean)
c
## $ozone
## month (all)
## 1 5 23.61538
## 2 6 29.44444
## 3 7 59.11538
## 4 8 59.96154
## 5 9 31.44828
##
## $solar.r
## month (all)
## 1 5 181.2963
## 2 6 190.1667
## 3 7 216.4839
## 4 8 171.8571
## 5 9 167.4333
##
## $wind
## month (all)
## 1 5 11.622581
## 2 6 10.266667
## 3 7 8.941935
## 4 8 8.793548
## 5 9 10.180000
##
## $temp
## month (all)
## 1 5 65.54839
## 2 6 79.10000
## 3 7 83.90323
## 4 8 83.96774
## 5 9 76.90000
d <- cast(aqm, month ~ variable, mean, margins = c("grand_row", "grand_col"))
d
## month ozone solar.r wind temp (all)
## 1 5 23.61538 181.2963 11.622581 65.54839 68.70696
## 2 6 29.44444 190.1667 10.266667 79.10000 87.38384
## 3 7 59.11538 216.4839 8.941935 83.90323 93.49748
## 4 8 59.96154 171.8571 8.793548 83.96774 79.71207
## 5 9 31.44828 167.4333 10.180000 76.90000 71.82689
## 6 (all) 42.12931 185.9315 9.957516 77.88235 80.05722
e <- cast(aqm, day ~ month, mean, subset=variable=="ozone")
e
## day 5 6 7 8 9
## 1 1 41 NaN 135 39 96
## 2 2 36 NaN 49 9 78
## 3 3 12 NaN 32 16 73
## 4 4 18 NaN NaN 78 91
## 5 5 NaN NaN 64 35 47
## 6 6 28 NaN 40 66 32
## 7 7 23 29 77 122 20
## 8 8 19 NaN 97 89 23
## 9 9 8 71 97 110 21
## 10 10 NaN 39 85 NaN 24
## 11 11 7 NaN NaN NaN 44
## 12 12 16 NaN 10 44 21
## 13 13 11 23 27 28 28
## 14 14 14 NaN NaN 65 9
## 15 15 18 NaN 7 NaN 13
## 16 16 14 21 48 22 46
## 17 17 34 37 35 59 18
## 18 18 6 20 61 23 13
## 19 19 30 12 79 31 24
## 20 20 11 13 63 44 16
## 21 21 1 NaN 16 21 13
## 22 22 11 NaN NaN 9 23
## 23 23 4 NaN NaN NaN 36
## 24 24 32 NaN 80 45 7
## 25 25 NaN NaN 108 168 14
## 26 26 NaN NaN 20 73 30
## 27 27 NaN NaN 52 NaN NaN
## 28 28 23 NaN 82 76 14
## 29 29 45 NaN 50 118 18
## 30 30 115 NaN 64 84 20
## 31 31 37 NaN 59 85 NaN
f <- cast(aqm, month ~ variable, range)
f
## month ozone_X1 ozone_X2 solar.r_X1 solar.r_X2 wind_X1 wind_X2 temp_X1 temp_X2
## 1 5 1 115 8 334 5.7 20.1 56 81
## 2 6 12 71 31 332 1.7 20.7 65 93
## 3 7 7 135 7 314 4.1 14.9 73 92
## 4 8 9 168 24 273 2.3 15.5 72 97
## 5 9 7 96 14 259 2.8 16.6 63 93
library(sqldf)
## 필요한 패키지를 로딩중입니다: gsubfn
## 필요한 패키지를 로딩중입니다: proto
## 필요한 패키지를 로딩중입니다: RSQLite
search()
## [1] ".GlobalEnv" "package:sqldf" "package:RSQLite"
## [4] "package:gsubfn" "package:proto" "package:reshape"
## [7] "package:stats" "package:graphics" "package:grDevices"
## [10] "package:utils" "package:datasets" "package:methods"
## [13] "Autoloads" "package:base"
iris : 아이리스(붓꽃)에 대한 데이터. 꽃잎 각 부분의 너비와 길이등을 측정한 데이터이며 150개의 레코드로 구성되어 있음
Sepal.Length : 꽃받침의 길이 정보
Sepal.Width : 꽃받침의 너비 정보
Pepal.Length : 꽃잎의 길이 정보
Sepal.Width : 꽃잎의 너비 정보
Species : 꽃의 종류 정보
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
sqldf("select * from iris")
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
sqldf("select * from iris limit 10")
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
sqldf("select count(*) from iris where Species like 'se%'")
## count(*)
## 1 50
rm(list = ls())
library(plyr)
##
## 다음의 패키지를 부착합니다: 'plyr'
## The following objects are masked from 'package:reshape':
##
## rename, round_any
search()
## [1] ".GlobalEnv" "package:plyr" "package:sqldf"
## [4] "package:RSQLite" "package:gsubfn" "package:proto"
## [7] "package:reshape" "package:stats" "package:graphics"
## [10] "package:grDevices" "package:utils" "package:datasets"
## [13] "package:methods" "Autoloads" "package:base"
set.seed(1)
d <- data.frame(year = rep(2012:2014, each = 6), count = round(runif(9, 0, 20)))
# runif(생성할 난수의 개수, 최소값, 최대값)9, 0, 20)
# round : 수를 반올림하여 정수화함
d
## year count
## 1 2012 5
## 2 2012 7
## 3 2012 11
## 4 2012 18
## 5 2012 4
## 6 2012 18
## 7 2013 19
## 8 2013 13
## 9 2013 13
## 10 2013 5
## 11 2013 7
## 12 2013 11
## 13 2014 18
## 14 2014 4
## 15 2014 18
## 16 2014 19
## 17 2014 13
## 18 2014 13
ddply(d, "year", function(x) {
mean.count = mean(x$count)
sd.count = sd(x$count)
cv = sd.count/mean.count
data.frame(cv.count = cv)
})
## year cv.count
## 1 2012 0.5985621
## 2 2013 0.4382254
## 3 2014 0.3978489
ddply(d, "year", summarise, mean.count = mean(count))
## year mean.count
## 1 2012 10.50000
## 2 2013 11.33333
## 3 2014 14.16667
ddply(d, "year", summarise, total.count = sum(count))
## year total.count
## 1 2012 63
## 2 2013 68
## 3 2014 85
ddply(d, "year", transform, mean.count = mean(count))
## year count mean.count
## 1 2012 5 10.50000
## 2 2012 7 10.50000
## 3 2012 11 10.50000
## 4 2012 18 10.50000
## 5 2012 4 10.50000
## 6 2012 18 10.50000
## 7 2013 19 11.33333
## 8 2013 13 11.33333
## 9 2013 13 11.33333
## 10 2013 5 11.33333
## 11 2013 7 11.33333
## 12 2013 11 11.33333
## 13 2014 18 14.16667
## 14 2014 4 14.16667
## 15 2014 18 14.16667
## 16 2014 19 14.16667
## 17 2014 13 14.16667
## 18 2014 13 14.16667
ddply(d, "year", transform, total.count = sum(count))
## year count total.count
## 1 2012 5 63
## 2 2012 7 63
## 3 2012 11 63
## 4 2012 18 63
## 5 2012 4 63
## 6 2012 18 63
## 7 2013 19 68
## 8 2013 13 68
## 9 2013 13 68
## 10 2013 5 68
## 11 2013 7 68
## 12 2013 11 68
## 13 2014 18 85
## 14 2014 4 85
## 15 2014 18 85
## 16 2014 19 85
## 17 2014 13 85
## 18 2014 13 85
library(data.table)
##
## 다음의 패키지를 부착합니다: 'data.table'
## The following object is masked from 'package:reshape':
##
## melt
search()
## [1] ".GlobalEnv" "package:data.table" "package:plyr"
## [4] "package:sqldf" "package:RSQLite" "package:gsubfn"
## [7] "package:proto" "package:reshape" "package:stats"
## [10] "package:graphics" "package:grDevices" "package:utils"
## [13] "package:datasets" "package:methods" "Autoloads"
## [16] "package:base"
set.seed(1)
DT <- data.table(x=c("b", "b", "b", "a", "a"), v=rnorm(5))
DT
## x v
## 1: b -0.6264538
## 2: b 0.1836433
## 3: b -0.8356286
## 4: a 1.5952808
## 5: a 0.3295078
# rnorm() : 정규분포에서 난수 생성
data(cars)
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
str(cars)
## 'data.frame': 50 obs. of 2 variables:
## $ speed: num 4 4 7 7 8 9 10 10 10 11 ...
## $ dist : num 2 10 4 22 16 10 18 26 34 17 ...
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
CARS <- data.table(cars)
head(CARS)
## speed dist
## 1: 4 2
## 2: 4 10
## 3: 7 4
## 4: 7 22
## 5: 8 16
## 6: 9 10
tables()
## NAME NROW NCOL MB COLS KEY
## 1: CARS 50 2 0 speed,dist
## 2: DT 5 2 0 x,v
## Total: 0MB
sapply(CARS, class)
## speed dist
## "numeric" "numeric"
sapply(DT, class)
## x v
## "character" "numeric"
# sapply(data, function) : data에서 function을 반환함
DT
## x v
## 1: b -0.6264538
## 2: b 0.1836433
## 3: b -0.8356286
## 4: a 1.5952808
## 5: a 0.3295078
DT[2,]
## x v
## 1: b 0.1836433
DT[DT$x == "b", ]
## x v
## 1: b -0.6264538
## 2: b 0.1836433
## 3: b -0.8356286
setkey(DT, x)
DT
## x v
## 1: a 1.5952808
## 2: a 0.3295078
## 3: b -0.6264538
## 4: b 0.1836433
## 5: b -0.8356286
tables()
## NAME NROW NCOL MB COLS KEY
## 1: CARS 50 2 0 speed,dist
## 2: DT 5 2 0 x,v x
## Total: 0MB
# key변수 x로 정렬됨
DT["b"]
## x v
## 1: b -0.6264538
## 2: b 0.1836433
## 3: b -0.8356286
DT["b", mult="first"]
## x v
## 1: b -0.6264538
DT["b", mult="last"]
## x v
## 1: b -0.8356286
grpsize <- ceiling(1e7/26^2) #천만개의 행과 676개 그룹
tt <- system.time(DF <- data.frame(
x = rep(LETTERS, each=26*grpsize),
y = rep(letters, each=grpsize),
v = runif(grpsize*26^2),
stringsAsFactors = FALSE))
tt
## 사용자 시스템 elapsed
## 0.53 0.02 0.55
head(DF, 3) #앞 3줄 조회
## x y v
## 1 A a 0.2059746
## 2 A a 0.1765568
## 3 A a 0.6870228
tail(DF, 3) #뒤 3줄 조회
## x y v
## 10000066 Z z 0.5775638
## 10000067 Z z 0.9617555
## 10000068 Z z 0.3814378
dim(DF) #행, 열 개수 조회
## [1] 10000068 3
tt <- system.time(ans1 <- DF[DF$x=="R"& DF$y == "h", ]) #벡터 검색
tt
## 사용자 시스템 elapsed
## 0.08 0.01 0.09
head(ans1, 3)
## x y v
## 6642058 R h 0.9137772
## 6642059 R h 0.5687812
## 6642060 R h 0.1304896
dim(ans1)
## [1] 14793 3
DT <- data.table(DF)
setkey(DT, x, y)
ss <- system.time(ans2 <- DT[J("R", "h")]) # 바이너리 검색
head(ans2, 3)
## x y v
## 1: R h 0.9137772
## 2: R h 0.5687812
## 3: R h 0.1304896
dim(ans2)
## [1] 14793 3
identical(ans1$v, ans2$v)
## [1] TRUE
ss
## 사용자 시스템 elapsed
## 0 0 0
system.time(ans2 <- DF[DF$x == "R" & DF$y == "h",])
## 사용자 시스템 elapsed
## 0.09 0.00 0.09
mapply(identical, ans1, ans2)
## x y v
## TRUE TRUE TRUE
# mapply(function, list#1, list#2, ...) : multi simple apply, 여러개의 리스트에 함수를 적용함
DT[,sum(v)]
## [1] 4999770
DT[,sum(v), by = x]
## x V1
## 1: A 192271.8
## 2: B 192262.1
## 3: C 192290.6
## 4: D 191924.7
## 5: E 192456.3
## 6: F 192241.7
## 7: G 192407.5
## 8: H 191993.0
## 9: I 192347.6
## 10: J 192436.9
## 11: K 192246.0
## 12: L 192271.8
## 13: M 192280.8
## 14: N 192306.0
## 15: O 192495.9
## 16: P 192339.6
## 17: Q 192384.8
## 18: R 192320.7
## 19: S 192283.5
## 20: T 192227.0
## 21: U 192101.2
## 22: V 192604.5
## 23: W 192469.8
## 24: X 192297.1
## 25: Y 192265.6
## 26: Z 192243.4
## x V1
ttt <- system.time( tt <- tapply(DT$v, DT$x, sum))
# tapply(data, index, function) : table apply, 그룹으로 묶은 후 함수를 적용함
ttt
## 사용자 시스템 elapsed
## 0.22 0.08 0.29
sss <- system.time( ss <- DT[, sum(v), by = x])
sss
## 사용자 시스템 elapsed
## 0.23 0.00 0.03
search()
## [1] ".GlobalEnv" "package:data.table" "package:plyr"
## [4] "package:sqldf" "package:RSQLite" "package:gsubfn"
## [7] "package:proto" "package:reshape" "package:stats"
## [10] "package:graphics" "package:grDevices" "package:utils"
## [13] "package:datasets" "package:methods" "Autoloads"
## [16] "package:base"
detach(package:data.table)
rm(list = ls())
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
head(iris, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
cov(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.6856935 -0.0424340 1.2743154 0.5162707
## Sepal.Width -0.0424340 0.1899794 -0.3296564 -0.1216394
## Petal.Length 1.2743154 -0.3296564 3.1162779 1.2956094
## Petal.Width 0.5162707 -0.1216394 1.2956094 0.5810063
cor(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
y <- c(1,2,3,NA)
is.na(y)
## [1] FALSE FALSE FALSE TRUE
x <- c(1,2,NA,3)
mean(x)
## [1] NA
mean(x, na.rm = T)
## [1] 2
kor <- read.csv("kor.csv", header = TRUE)
head(kor)
## No Year Position Player Age Height
## 1 1 2002 GK 이운재 29 182
## 2 2 2002 GK 김병지 32 NA
## 3 3 2002 GK 최은성 31 184
## 4 4 2002 DF 현영민 23 178
## 5 5 2002 DF 최진철 31 187
## 6 6 2002 DF 김태영 32 180
str(kor)
## 'data.frame': 50 obs. of 6 variables:
## $ No : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : int 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 ...
## $ Position: chr "GK" "GK" "GK" "DF" ...
## $ Player : chr "이운재" "김병지" "최은성" "현영민" ...
## $ Age : int 29 32 31 23 31 32 25 29 22 33 ...
## $ Height : int 182 NA 184 178 187 180 177 182 181 181 ...
summary(kor)
## No Year Position Player
## Min. : 1.00 Min. :2002 Length:50 Length:50
## 1st Qu.:13.25 1st Qu.:2002 Class :character Class :character
## Median :25.50 Median :2022 Mode :character Mode :character
## Mean :25.50 Mean :2013
## 3rd Qu.:37.75 3rd Qu.:2022
## Max. :50.00 Max. :2022
##
## Age Height
## Min. :21.00 Min. :173
## 1st Qu.:24.50 1st Qu.:177
## Median :28.00 Median :181
## Mean :27.34 Mean :181
## 3rd Qu.:30.00 3rd Qu.:186
## Max. :34.00 Max. :194
## NA's :3 NA's :3
sum(is.na(kor))
## [1] 6
complete_kor <- kor[complete.cases(kor), ]
sum(is.na(complete_kor))
## [1] 0
library(Amelia)
## 필요한 패키지를 로딩중입니다: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
search()
## [1] ".GlobalEnv" "package:Amelia" "package:Rcpp"
## [4] "package:plyr" "package:sqldf" "package:RSQLite"
## [7] "package:gsubfn" "package:proto" "package:reshape"
## [10] "package:stats" "package:graphics" "package:grDevices"
## [13] "package:utils" "package:datasets" "package:methods"
## [16] "Autoloads" "package:base"
data("freetrade")
head(freetrade)
## year country tariff polity pop gdp.pc intresmi signed fiveop
## 1 1981 SriLanka NA 6 14988000 461.0236 1.937347 0 12.4
## 2 1982 SriLanka NA 5 15189000 473.7634 1.964430 0 12.5
## 3 1983 SriLanka 41.3 5 15417000 489.2266 1.663936 1 12.3
## 4 1984 SriLanka NA 5 15599000 508.1739 2.797462 0 12.3
## 5 1985 SriLanka 31.0 5 15837000 525.5609 2.259116 0 12.3
## 6 1986 SriLanka NA 5 16117000 538.9237 1.832549 0 12.5
## usheg
## 1 0.2593112
## 2 0.2558008
## 3 0.2655022
## 4 0.2988009
## 5 0.2952431
## 6 0.2886563
str(freetrade)
## 'data.frame': 171 obs. of 10 variables:
## $ year : int 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 ...
## $ country : chr "SriLanka" "SriLanka" "SriLanka" "SriLanka" ...
## $ tariff : num NA NA 41.3 NA 31 ...
## $ polity : int 6 5 5 5 5 5 5 5 5 5 ...
## $ pop : num 14988000 15189000 15417000 15599000 15837000 ...
## $ gdp.pc : num 461 474 489 508 526 ...
## $ intresmi: num 1.94 1.96 1.66 2.8 2.26 ...
## $ signed : int 0 0 1 0 0 0 0 1 0 0 ...
## $ fiveop : num 12.4 12.5 12.3 12.3 12.3 ...
## $ usheg : num 0.259 0.256 0.266 0.299 0.295 ...
sum(is.na(freetrade))
## [1] 94
a.out <- amelia(freetrade, m = 5, ts = "year", cs = "country")
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
a.out
##
## Amelia output with 5 imputed datasets.
## Return code: 1
## Message: Normal EM convergence.
##
## Chain Lengths:
## --------------
## Imputation 1: 16
## Imputation 2: 16
## Imputation 3: 14
## Imputation 4: 15
## Imputation 5: 17
hist(a.out$imputations[[3]]$tariff, col ="grey", border = "white")
save(a.out, file = "imputations.RData")
write.amelia(obj=a.out, file.stem = "outdata")
missmap(a.out)
freetrade$tariff <- a.out$imputation[[5]]$tariff
missmap(freetrade)
sum(is.na(freetrade))
## [1] 36
이상값(outlier)의 정의 > BAD DATA >> a1 : 의도하지 않게 잘못 입력한 경우(human error) >> a2 : 의도하지 않게 입력됐으나 분석 목적에 부합하지 않아 제거해야 하는 경우 > OUTLIER >> a3 : 의도되지 않은 현상이지만 분석에 포함해야 하는 경우 >> b1 : 의도된 이상값 (ex. 사기)
x=rnorm(100)
boxplot(x)
x=c(x, 19, 28, 30)
outwith <- boxplot(x)
outwith$out # 이상값 검출
## [1] 19 28 30
library(outliers)
search()
## [1] ".GlobalEnv" "package:outliers" "package:Amelia"
## [4] "package:Rcpp" "package:plyr" "package:sqldf"
## [7] "package:RSQLite" "package:gsubfn" "package:proto"
## [10] "package:reshape" "package:stats" "package:graphics"
## [13] "package:grDevices" "package:utils" "package:datasets"
## [16] "package:methods" "Autoloads" "package:base"
set.seed(1234)
y= rnorm(100)
outlier(y)
## [1] 2.548991
outlier(y, opposite = TRUE)
## [1] -2.345698
dim(y) <- c(20,5)
outlier(y)
## [1] 2.415835 1.102298 1.647817 2.548991 2.121117
outlier(y, opposite = TRUE)
## [1] -2.345698 -2.180040 -1.806031 -1.390701 -1.372302
boxplot(y)