# (2) Training vs. Validation vs. Test Split
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
data(iris)
#행의 수"iris0" #행의 수가 몇개인가
nrow(iris)
## [1] 150
idx <-createDataPartition(iris$Species,p=0.6,list=FALSE)
idx<-createDataPartition(iris$Species,p=0.6,list = FALSE)
idx
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 5
## [5,] 8
## [6,] 9
## [7,] 10
## [8,] 12
## [9,] 14
## [10,] 15
## [11,] 16
## [12,] 19
## [13,] 20
## [14,] 21
## [15,] 22
## [16,] 23
## [17,] 24
## [18,] 26
## [19,] 28
## [20,] 29
## [21,] 30
## [22,] 31
## [23,] 32
## [24,] 34
## [25,] 38
## [26,] 40
## [27,] 42
## [28,] 45
## [29,] 46
## [30,] 49
## [31,] 52
## [32,] 54
## [33,] 58
## [34,] 59
## [35,] 60
## [36,] 64
## [37,] 66
## [38,] 67
## [39,] 69
## [40,] 70
## [41,] 71
## [42,] 72
## [43,] 74
## [44,] 75
## [45,] 77
## [46,] 78
## [47,] 79
## [48,] 80
## [49,] 81
## [50,] 82
## [51,] 83
## [52,] 85
## [53,] 86
## [54,] 87
## [55,] 88
## [56,] 90
## [57,] 91
## [58,] 93
## [59,] 99
## [60,] 100
## [61,] 101
## [62,] 107
## [63,] 108
## [64,] 109
## [65,] 110
## [66,] 111
## [67,] 112
## [68,] 113
## [69,] 115
## [70,] 118
## [71,] 119
## [72,] 120
## [73,] 121
## [74,] 122
## [75,] 123
## [76,] 124
## [77,] 126
## [78,] 127
## [79,] 129
## [80,] 130
## [81,] 132
## [82,] 133
## [83,] 134
## [84,] 135
## [85,] 137
## [86,] 140
## [87,] 142
## [88,] 143
## [89,] 144
## [90,] 145
train<-iris[idx,]
test<-iris[-idx,]
#round함수
round(0.7811159,2)
## [1] 0.78
#데이터전처리
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
data("diamonds")
diamonds %>% head
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
library(dplyr)
library(ggplot2)
data("diamonds")
diamonds1<-diamonds %>% rename(c=clarity,p=price)
head(diamonds1,3)
## # A tibble: 3 × 10
## carat cut color c depth table p x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
count(diamonds,cut)
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
glimpse(diamonds) #행의 수 5만여개 #변수10개
## Rows: 53,940
## Columns: 10
## $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…
library(dplyr)
library(ggplot2)
df1<-diamonds %>% select(carat,price)
head(df1,3)
## # A tibble: 3 × 2
## carat price
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
diamonds %>% filter(cut=="Good") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
diamonds %>% mutate(Ratio=price/carat,Double=Ratio*2) %>% head(3)
## # A tibble: 3 × 12
## carat cut color clarity depth table price x y z Ratio Double
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417. 2835.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552. 3105.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422. 2843.