library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(AppliedPredictiveModeling)
data(twoClassData)
head(predictors)
## PredictorA PredictorB
## 1 0.1582 0.1609
## 2 0.6552 0.4918
## 3 0.7060 0.6333
## 4 0.1992 0.0881
## 5 0.3952 0.4152
## 6 0.4250 0.2988
class(predictors)
## [1] "data.frame"
glimpse(predictors)
## Rows: 208
## Columns: 2
## $ PredictorA <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3…
## $ PredictorB <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2…
set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE)
trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)
## 'data.frame': 167 obs. of 2 variables:
## $ PredictorA: num 0.1582 0.6552 0.706 0.0658 0.3086 ...
## $ PredictorB: num 0.161 0.492 0.633 0.179 0.28 ...
str(testPredictors)
## 'data.frame': 41 obs. of 2 variables:
## $ PredictorA: num 0.1992 0.3952 0.425 0.0847 0.2909 ...
## $ PredictorB: num 0.0881 0.4152 0.2988 0.0548 0.3021 ...
repeatedSplit<-createDataPartition(trainClasses,p=.8,times=3)
str(repeatedSplit)
## List of 3
## $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
## $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
## $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...
set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain=TRUE)
str(cvSplit)
## List of 10
## $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
## $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
## $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
## $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
## $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
## $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
## $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
## $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
## $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
## $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...
fold1<-cvSplit[[1]]
length(fold1)
## [1] 150
cvPredictors1<-trainPredictors[fold1,]
cvClasses<-trainClasses[fold1]
nrow(trainPredictors)
## [1] 167
#튜닝변수판단
data("GermanCredit")
glimpse(GermanCredit)
## Rows: 1,000
## Columns: 62
## $ Duration <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0 <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200 <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100 <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4 <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7 <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7 <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…
set.seed(1056)
table(GermanCredit$Class)
##
## Bad Good
## 300 700
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]#[[1]]대신에 list=FALSE도 가능
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest <- GermanCredit[-inTrain, ]
svmFit<-train(Class~.,
data=GermanCreditTrain,
method='svmRadial',#'nnet''=neuralnet
preProc=c('center','scale','BoxCox'),
tuneLength=10,#실제결과값을 10개를 다보여줌
trControl=trainControl(method="cv",#모형 평가 방법
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
svmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered (41), scaled (41), Box-Cox transformation (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.76000 0.3910287
## 0.50 0.76375 0.4050255
## 1.00 0.76625 0.3885478
## 2.00 0.76625 0.3818034
## 4.00 0.76125 0.3594753
## 8.00 0.75375 0.3446185
## 16.00 0.75375 0.3236219
## 32.00 0.73375 0.2706737
## 64.00 0.73250 0.2763079
## 128.00 0.72250 0.2524035
##
## Tuning parameter 'sigma' was held constant at a value of 0.01367206
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.01367206 and C = 1.
plot(svmFit,scale=list(x=list(log=2)))

predicted<-predict(svmFit,GermanCreditTest)
predicted
## [1] Good Good Bad Bad Good Good Good Good Good Good Good Good Bad Bad Good
## [16] Good Good Good Good Good Good Good Good Good Good Good Bad Good Good Good
## [31] Good Good Good Bad Good Good Good Good Bad Good Good Good Good Bad Bad
## [46] Bad Bad Good Good Good Good Good Good Good Good Bad Good Good Good Good
## [61] Good Good Bad Good Good Good Bad Good Bad Good Good Good Good Bad Good
## [76] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
## [91] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
## [106] Good Good Good Bad Good Good Good Good Good Good Good Good Good Good Good
## [121] Good Good Good Good Good Good Good Bad Good Good Good Bad Good Good Good
## [136] Good Good Good Bad Bad Good Good Good Bad Good Good Bad Good Good Bad
## [151] Bad Good Good Good Bad Good Good Good Good Good Good Good Bad Good Bad
## [166] Good Good Bad Good Good Good Bad Good Bad Good Bad Good Good Bad Bad
## [181] Good Good Good Bad Good Good Good Good Good Bad Good Good Good Good Good
## [196] Good Bad Good Bad Bad
## Levels: Bad Good
predictedProbs<-predict(svmFit,newdata=GermanCreditTest,
type="prob")#이거사용해야함
head(predictedProbs)
## Bad Good
## 1 0.33876152 0.6612385
## 2 0.08259168 0.9174083
## 3 0.55882166 0.4411783
## 4 0.50963974 0.4903603
## 5 0.14219994 0.8578001
## 6 0.33578260 0.6642174
cat(predictedProbs[[2]])
## 0.6612385 0.9174083 0.4411783 0.4903603 0.8578001 0.6642174 0.7725987 0.9351605 0.8174377 0.9298628 0.8322412 0.754578 0.2713566 0.4985467 0.7759904 0.9741471 0.819931 0.637725 0.9609196 0.8944426 0.6872734 0.8574688 0.7693772 0.5545293 0.8380217 0.6880073 0.4945644 0.8997162 0.7869101 0.8475205 0.6285916 0.7474174 0.9346821 0.4220624 0.653359 0.6362846 0.737069 0.9604411 0.3895535 0.8909656 0.7728124 0.7793097 0.9336168 0.3536514 0.4742933 0.2152898 0.4224262 0.9434183 0.9377611 0.7320562 0.8977547 0.9570366 0.8594884 0.8998881 0.9378063 0.3024888 0.9549617 0.5719311 0.9237349 0.8808034 0.9166476 0.901844 0.223118 0.9568923 0.7677628 0.674256 0.2552843 0.7630487 0.4327053 0.7073402 0.5014827 0.7637778 0.6603249 0.1076062 0.8741949 0.506621 0.8620246 0.7431002 0.9218367 0.5273419 0.9418496 0.7723959 0.8514562 0.7672604 0.5504139 0.759398 0.5689542 0.9646418 0.8187491 0.907998 0.5235187 0.534559 0.720365 0.9287172 0.8404767 0.9388071 0.7005659 0.6899153 0.8929507 0.9426763 0.9232948 0.5606805 0.815046 0.6687903 0.9423604 0.6267822 0.7799241 0.9307077 0.359694 0.9284767 0.7913503 0.6961812 0.8439401 0.7099994 0.9245359 0.8658003 0.7919118 0.8569082 0.7668951 0.8899895 0.7375167 0.9033209 0.7337488 0.8708714 0.8973274 0.7170046 0.7280797 0.375912 0.7733143 0.8602702 0.9001194 0.4535604 0.9433004 0.9057258 0.8540322 0.9159102 0.9009756 0.5356577 0.3976582 0.3909891 0.9018596 0.9521543 0.968656 0.4435137 0.934002 0.737911 0.2368053 0.9426523 0.7112473 0.3514163 0.3117373 0.8926258 0.8946715 0.9579374 0.4691112 0.8521316 0.8630506 0.7802455 0.7745462 0.7908171 0.906468 0.5194479 0.48291 0.969322 0.4470405 0.9004057 0.8702441 0.4516686 0.7417544 0.692164 0.8952413 0.3476618 0.8729552 0.4613622 0.677186 0.3372772 0.9468024 0.8489456 0.4398686 0.3390773 0.8711777 0.5763659 0.9583501 0.3587548 0.7613046 0.9286051 0.9000054 0.6407399 0.5882035 0.4202011 0.8830693 0.9571663 0.8331337 0.9175973 0.919961 0.7035797 0.4202835 0.8891012 0.3814957 0.3724153