'
install.packages("caret")
library(caret)
nrow(iris)
idx <- createDataPartition(iris$Species,
p=0.6, list=FALSE)
train <- iris[idx,]
test <- iris[-idx,]
nrow(train) # 60% 학습용
nrow(test) # 40% 평가용
'
## [1] "\ninstall.packages(\"caret\")\nlibrary(caret)\n\nnrow(iris)\nidx <- createDataPartition(iris$Species,\n p=0.6, list=FALSE)\ntrain <- iris[idx,]\ntest <- iris[-idx,]\nnrow(train) # 60% 학습용\nnrow(test) # 40% 평가용\n"
round(0.7811159, 2)
## [1] 0.78
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
data("diamonds")
'%>%' # 파이프 연산자
## [1] "%>%"
diamonds %>% head
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
diamonds1 <- diamonds %>% head %>%
rename(c=clarity, p=price)#변수명 수정
head(diamonds1, 3)
## # A tibble: 3 × 10
## carat cut color c depth table p x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
count(diamonds, cut) # 빈도 분석
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
# 필요한 열만 추출
df1 <- diamonds %>% select(carat, price)
head(df1, 3)
## # A tibble: 3 × 2
## carat price
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
# 비교값이 같은 데이터 추출
diamonds %>% filter(cut == "Good") %>%
head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
# 파생변수 만들기
diamonds %>% mutate(Ratio=price/carat,
Double=Ratio*2) %>% head(3)
## # A tibble: 3 × 12
## carat cut color clarity depth table price x y z Ratio Double
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417. 2835.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552. 3105.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422. 2843.