rm(list=ls())
ls() # 내가 썻던 변수 이름을 다 불러옴
## character(0)
getwd()
## [1] "C:/data"
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: 패키지 'caret'는 R 버전 4.2.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
library(recipes)
## Warning: 패키지 'recipes'는 R 버전 4.2.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(ggplot2)
library(mlbench)
## Warning: 패키지 'mlbench'는 R 버전 4.2.2에서 작성되었습니다
data(Glass)
glimpse(Glass)
## Rows: 214
## Columns: 10
## $ RI <dbl> 1.52101, 1.51761, 1.51618, 1.51766, 1.51742, 1.51596, 1.51743, 1.…
## $ Na <dbl> 13.64, 13.89, 13.53, 13.21, 13.27, 12.79, 13.30, 13.15, 14.04, 13…
## $ Mg <dbl> 4.49, 3.60, 3.55, 3.69, 3.62, 3.61, 3.60, 3.61, 3.58, 3.60, 3.46,…
## $ Al <dbl> 1.10, 1.36, 1.54, 1.29, 1.24, 1.62, 1.14, 1.05, 1.37, 1.36, 1.56,…
## $ Si <dbl> 71.78, 72.73, 72.99, 72.61, 73.08, 72.97, 73.09, 73.24, 72.08, 72…
## $ K <dbl> 0.06, 0.48, 0.39, 0.57, 0.55, 0.64, 0.58, 0.57, 0.56, 0.57, 0.67,…
## $ Ca <dbl> 8.75, 7.83, 7.78, 8.22, 8.07, 8.07, 8.17, 8.24, 8.30, 8.40, 8.09,…
## $ Ba <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Fe <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.26, 0.00, 0.00, 0.00, 0.11, 0.24,…
## $ Type <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
table(Glass$Type)
##
## 1 2 3 5 6 7
## 70 76 17 13 9 29
pairs(Glass)

pairs(Glass[-10])

cor(Glass[,-10])
## RI Na Mg Al Si K
## RI 1.0000000000 -0.19188538 -0.122274039 -0.40732603 -0.54205220 -0.289832711
## Na -0.1918853790 1.00000000 -0.273731961 0.15679367 -0.06980881 -0.266086504
## Mg -0.1222740393 -0.27373196 1.000000000 -0.48179851 -0.16592672 0.005395667
## Al -0.4073260341 0.15679367 -0.481798509 1.00000000 -0.00552372 0.325958446
## Si -0.5420521997 -0.06980881 -0.165926723 -0.00552372 1.00000000 -0.193330854
## K -0.2898327111 -0.26608650 0.005395667 0.32595845 -0.19333085 1.000000000
## Ca 0.8104026963 -0.27544249 -0.443750026 -0.25959201 -0.20873215 -0.317836155
## Ba -0.0003860189 0.32660288 -0.492262118 0.47940390 -0.10215131 -0.042618059
## Fe 0.1430096093 -0.24134641 0.083059529 -0.07440215 -0.09420073 -0.007719049
## Ca Ba Fe
## RI 0.8104027 -0.0003860189 0.143009609
## Na -0.2754425 0.3266028795 -0.241346411
## Mg -0.4437500 -0.4922621178 0.083059529
## Al -0.2595920 0.4794039017 -0.074402151
## Si -0.2087322 -0.1021513105 -0.094200731
## K -0.3178362 -0.0426180594 -0.007719049
## Ca 1.0000000 -0.1128409671 0.124968219
## Ba -0.1128410 1.0000000000 -0.058691755
## Fe 0.1249682 -0.0586917554 1.000000000
par(mfrow=c(1,2))
boxplot(Glass$Mg ~ Glass$Type)
boxplot(Glass$Al ~ Glass$Type)

library(corrplot)
## Warning: 패키지 'corrplot'는 R 버전 4.2.2에서 작성되었습니다
## corrplot 0.92 loaded
par(mfrow=c(1,2))
corrplot(cor(Glass[,-10]), order="hclust")
#skewness(왜도)
apply(iris[1:4],2,mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
library(e1071)
## Warning: 패키지 'e1071'는 R 버전 4.2.2에서 작성되었습니다
apply(Glass[,-10], 2,skewness)
## RI Na Mg Al Si K Ca
## 1.6027151 0.4478343 -1.1364523 0.8946104 -0.7202392 6.4600889 2.0184463
## Ba Fe
## 3.3686800 1.7298107
trans<-preProcess(Glass[,-10],method=c("BoxCox","center","scale")) #center 표준화 변수 scale( 모든 변수를 0과 1사이로 바꿔줌)
trans
## Created from 214 samples and 9 variables
##
## Pre-processing:
## - Box-Cox transformation (5)
## - centered (9)
## - ignored (0)
## - scaled (9)
##
## Lambda estimates for Box-Cox transformation:
## -2, -0.1, 0.5, 2, -1.1
transformed<-predict(trans,Glass[,-10])
head(transformed[,1:5])
## RI Na Mg Al Si
## 1 0.8756898 0.3133883 1.2517037 -0.65520274 -1.12729016
## 2 -0.2471367 0.6129977 0.6346799 -0.08726137 0.09719851
## 3 -0.7216425 0.1798164 0.6000157 0.27454124 0.43512776
## 4 -0.2305698 -0.2150217 0.6970756 -0.23439154 -0.05836211
## 5 -0.3101056 -0.1402661 0.6485456 -0.34194384 0.55238422
## 6 -0.7947626 -0.7480171 0.6416128 0.42852325 0.40909039
trans1<-preProcess(Glass[,-10], method=c("BoxCox","center","scale","pca"))
transformed1<-predict(trans1,Glass[,-10])
head(transformed1[,1:5])
## PC1 PC2 PC3 PC4 PC5
## 1 -1.2126444 -0.3942139 -0.1730756 -1.7193852 0.1913387
## 2 0.6179073 -0.7020476 -0.5507034 -0.8575350 0.1566312
## 3 0.9907027 -0.8876886 -0.6452946 -0.3027716 0.1363025
## 4 0.1510212 -0.9042336 -0.1622361 -0.4521567 0.4291846
## 5 0.3582849 -1.0160965 -0.5763959 -0.1667831 0.3634192
## 6 0.3408017 -1.3565637 0.7451275 1.0568333 -1.7762845
set.seed(777)
library(mlbench)
data("BostonHousing")
BostonHousing[sample(1:nrow(BostonHousing),10),"crim"] <-NA
sample(1:46, 6)
## [1] 9 16 4 44 26 23
sample(1:46, 6)
## [1] 22 12 39 8 10 14
colSums(is.na(BostonHousing))
## crim zn indus chas nox rm age dis rad tax
## 10 0 0 0 0 0 0 0 0 0
## ptratio b lstat medv
## 0 0 0 0
Y<-BostonHousing$medv
X<-BostonHousing[, 1:5]
model<-caret::train(x=X, y=Y,method="rf",preProcess="medianImpute")
model
## Random Forest
##
## 506 samples
## 5 predictor
##
## Pre-processing: median imputation (4), ignore (1)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 506, 506, 506, 506, 506, 506, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 6.188803 0.5601644 4.187659
## 3 6.295054 0.5488742 4.244309
## 5 6.584417 0.5148418 4.427385
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
set.seed(777)
data(BostonHousing)
BostonHousing[BostonHousing$crim>0.5, "crim"]<-NA
Y<-BostonHousing$medv
X<-BostonHousing[,c(1:3,5)]
model_median<-caret::train(x=X,y=Y,method="glm",preProcess="medianImpute")
print(min(model_median$results$RMSE))
## [1] 7.870132
rm(list=ls())
library(AppliedPredictiveModeling)
## Warning: 패키지 'AppliedPredictiveModeling'는 R 버전 4.2.2에서 작성되었습니다
data(twoClassData)
head(predictors)
## PredictorA PredictorB
## 1 0.1582 0.1609
## 2 0.6552 0.4918
## 3 0.7060 0.6333
## 4 0.1992 0.0881
## 5 0.3952 0.4152
## 6 0.4250 0.2988
glimpse(predictors)
## Rows: 208
## Columns: 2
## $ PredictorA <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3…
## $ PredictorB <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2…
class(predictors)
## [1] "data.frame"
glimpse(classes)
## Factor w/ 2 levels "Class1","Class2": 2 2 2 2 2 2 2 2 2 2 ...
set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE) #평가데이터랑 8:2로 분리해서 관리
head(traingRows)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 7
## [5,] 8
## [6,] 9
trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)
## 'data.frame': 167 obs. of 2 variables:
## $ PredictorA: num 0.1582 0.6552 0.706 0.0658 0.3086 ...
## $ PredictorB: num 0.161 0.492 0.633 0.179 0.28 ...
str(testPredictors)
## 'data.frame': 41 obs. of 2 variables:
## $ PredictorA: num 0.1992 0.3952 0.425 0.0847 0.2909 ...
## $ PredictorB: num 0.0881 0.4152 0.2988 0.0548 0.3021 ...
repeatedSplit<-createDataPartition(trainClasses,p=0.8, times=3)
str(repeatedSplit)
## List of 3
## $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
## $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
## $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...
#k-fold
set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain = TRUE)
str(cvSplit)
## List of 10
## $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
## $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
## $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
## $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
## $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
## $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
## $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
## $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
## $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
## $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...
fold1<-cvSplit[[1]]
length(fold1)
## [1] 150
cvPredictors1<-trainPredictors[fold1,]
cvClasses1<-trainClasses[fold1]
nrow(trainPredictors)
## [1] 167
nrow(cvPredictors1)
## [1] 150
library(caret)
data("GermanCredit")
glimpse(GermanCredit)
## Rows: 1,000
## Columns: 62
## $ Duration <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0 <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200 <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100 <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4 <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7 <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7 <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…
set.seed(1056)
table(GermanCredit$Class)
##
## Bad Good
## 300 700
set.seed(1056)
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]] # [[1]]은 list=FALSE랑 같음
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest <- GermanCredit[-inTrain, ]
library(kernlab)
##
## 다음의 패키지를 부착합니다: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
svmFit<-train(Class~.,
data=GermanCreditTrain,
method='svmRadial', #rf와 knn 정도만 사용하면 됨
preProc=c('center','scale'),
tuneLength=10, #출력값 10개 확인?
trControl=trainControl(method="cv", #모양을 평가하는 방법, cv = k-fold 교차검증
repeats=5,
classProbs = TRUE)) #classProbs는 확률값을 표현해주는 함수
## Warning: `repeats` has no meaning for this resampling method.
svmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.75625 0.3858886
## 0.50 0.76500 0.4087530
## 1.00 0.77000 0.3961280
## 2.00 0.76375 0.3782815
## 4.00 0.76500 0.3688453
## 8.00 0.77000 0.3892362
## 16.00 0.75125 0.3151667
## 32.00 0.73500 0.2797416
## 64.00 0.73250 0.2770492
## 128.00 0.72875 0.2713074
##
## Tuning parameter 'sigma' was held constant at a value of 0.0138183
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.0138183 and C = 1.
rfFit<-train(Class~.,
data=GermanCreditTrain,
method='rf',
preProc=c('center','scale'),
tuneLength=10,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
rfFit
## Random Forest
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.73500 0.1888907
## 6 0.76375 0.3571717
## 10 0.77000 0.3815096
## 15 0.76500 0.3785229
## 19 0.76750 0.3918055
## 23 0.76500 0.3886457
## 28 0.76750 0.3954219
## 32 0.77000 0.4021965
## 36 0.76250 0.3870212
## 41 0.76625 0.3958864
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 10.
knnFit<-train(Class~.,
data=GermanCreditTrain,
method='knn',
preProc=c('center','scale'),
tuneLength=10,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
knnFit
## k-Nearest Neighbors
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.71250 0.2453952
## 7 0.71625 0.2389015
## 9 0.72250 0.2360241
## 11 0.72250 0.2345050
## 13 0.72875 0.2389910
## 15 0.73500 0.2446996
## 17 0.74875 0.2797479
## 19 0.73750 0.2285858
## 21 0.73375 0.2148876
## 23 0.73500 0.2137900
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 17.
knnFit1<-train(Class~.,
data=GermanCreditTrain,
method='knn',
preProc=c('BoxCox', 'center','scale'),
tuneLength=5,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
knnFit1
## k-Nearest Neighbors
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: Box-Cox transformation (6), centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.71875 0.2556639
## 7 0.71875 0.2354375
## 9 0.71875 0.2324534
## 11 0.72000 0.2388210
## 13 0.73125 0.2545857
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 13.
knnFit2<-train(Class~.,
data=GermanCreditTrain,
method='knn',
preProc=c('BoxCox', 'center','scale'),
tuneLength=10,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
knnFit2
## k-Nearest Neighbors
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: Box-Cox transformation (6), centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.72000 0.2741181
## 7 0.72875 0.2657760
## 9 0.71875 0.2270198
## 11 0.73000 0.2482702
## 13 0.73125 0.2533382
## 15 0.73625 0.2444686
## 17 0.73375 0.2360879
## 19 0.72750 0.2109112
## 21 0.73375 0.2235829
## 23 0.73500 0.2218212
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 15.
knnFit3<-train(Class~.,
data=GermanCreditTrain,
method='knn',
preProc=c('BoxCox', 'center','scale', 'pca'),
tuneLength=5,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
knnFit3
## k-Nearest Neighbors
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: Box-Cox transformation (6), centered (41), scaled
## (41), principal component signal extraction (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.71000 0.2436294
## 7 0.71875 0.2511040
## 9 0.71375 0.2224824
## 11 0.72375 0.2446006
## 13 0.73375 0.2610622
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 13.
plot(svmFit, scales=list(x=list(llog=2)))
## Warning in complete_names(x, x.scales): Invalid or ambiguous component names:
## llog
predicted<-predict(svmFit,GermanCreditTest)
predictedProbs<-predict(svmFit, newdata=GermanCreditTest, typ="prob")
head(predicted)
## [1] Good Good Bad Bad Good Good
## Levels: Bad Good
head(predictedProbs)
## Bad Good
## 1 0.4238216 0.5761784
## 2 0.0730835 0.9269165
## 3 0.5650184 0.4349816
## 4 0.6932819 0.3067181
## 5 0.1754360 0.8245640
## 6 0.3223266 0.6776734
