library(caret)
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
#install.packages("AppliedPredictiveModeling")
library(AppliedPredictiveModeling)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(twoClassData)
head(predictors)
## PredictorA PredictorB
## 1 0.1582 0.1609
## 2 0.6552 0.4918
## 3 0.7060 0.6333
## 4 0.1992 0.0881
## 5 0.3952 0.4152
## 6 0.4250 0.2988
head(classes)
## [1] Class2 Class2 Class2 Class2 Class2 Class2
## Levels: Class1 Class2
length(classes)
## [1] 208
set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE)
head(traingRows)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 7
## [5,] 8
## [6,] 9
trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)
## 'data.frame': 167 obs. of 2 variables:
## $ PredictorA: num 0.1582 0.6552 0.706 0.0658 0.3086 ...
## $ PredictorB: num 0.161 0.492 0.633 0.179 0.28 ...
str(testPredictors)
## 'data.frame': 41 obs. of 2 variables:
## $ PredictorA: num 0.1992 0.3952 0.425 0.0847 0.2909 ...
## $ PredictorB: num 0.0881 0.4152 0.2988 0.0548 0.3021 ...
repeatedSplit<-createDataPartition(trainClasses,p=0.8,
times=3)
str(repeatedSplit)
## List of 3
## $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
## $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
## $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...
set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain = TRUE)
str(cvSplit)
## List of 10
## $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
## $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
## $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
## $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
## $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
## $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
## $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
## $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
## $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
## $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...
fold1<-cvSplit[[1]]
length(fold1)
## [1] 150
cvPredictors1<-trainPredictors[fold1,]
cvClasses1<-trainClasses[fold1]
nrow(trainPredictors)
## [1] 167
nrow(cvPredictors1)
## [1] 150
library(caret)
data("GermanCredit")
glimpse(GermanCredit)
## Rows: 1,000
## Columns: 62
## $ Duration <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0 <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200 <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100 <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4 <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7 <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7 <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…
set.seed(1056)
table(GermanCredit$Class)
##
## Bad Good
## 300 700
set.seed(1056)
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest <- GermanCredit[-inTrain, ]
svmFit<-train(Class~.,
data=GermanCreditTrain,
method='svmRadial',
preProc=c('center','scale'),
tuneLength=10,
trControl=trainControl(method="cv",
repeats=5,
classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
svmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 800 samples
## 41 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered (41), scaled (41)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.75625 0.3858886
## 0.50 0.76500 0.4087530
## 1.00 0.77000 0.3961280
## 2.00 0.76375 0.3782815
## 4.00 0.76500 0.3688453
## 8.00 0.77000 0.3892362
## 16.00 0.75125 0.3151667
## 32.00 0.73500 0.2797416
## 64.00 0.73250 0.2770492
## 128.00 0.72875 0.2713074
##
## Tuning parameter 'sigma' was held constant at a value of 0.0138183
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.0138183 and C = 1.
plot(svmFit,scales=list(x=list(log=2)))

predicted<-predict(svmFit,GermanCreditTest)
predictedProbs<-predict(svmFit,newdata=GermanCreditTest,
type="prob")
head(predictedProbs)
## Bad Good
## 1 0.4238216 0.5761784
## 2 0.0730835 0.9269165
## 3 0.5650184 0.4349816
## 4 0.6932819 0.3067181
## 5 0.1754360 0.8245640
## 6 0.3223266 0.6776734