library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(AppliedPredictiveModeling)
data(twoClassData)
head(predictors)
##   PredictorA PredictorB
## 1     0.1582     0.1609
## 2     0.6552     0.4918
## 3     0.7060     0.6333
## 4     0.1992     0.0881
## 5     0.3952     0.4152
## 6     0.4250     0.2988
class(predictors)
## [1] "data.frame"
glimpse(predictors)
## Rows: 208
## Columns: 2
## $ PredictorA <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3…
## $ PredictorB <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2…
set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE)
trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)
## 'data.frame':    167 obs. of  2 variables:
##  $ PredictorA: num  0.1582 0.6552 0.706 0.0658 0.3086 ...
##  $ PredictorB: num  0.161 0.492 0.633 0.179 0.28 ...
str(testPredictors)
## 'data.frame':    41 obs. of  2 variables:
##  $ PredictorA: num  0.1992 0.3952 0.425 0.0847 0.2909 ...
##  $ PredictorB: num  0.0881 0.4152 0.2988 0.0548 0.3021 ...
repeatedSplit<-createDataPartition(trainClasses,p=.8,times=3)
str(repeatedSplit)
## List of 3
##  $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...
set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain=TRUE)
str(cvSplit)
## List of 10
##  $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
##  $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
##  $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
##  $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
##  $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
##  $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
##  $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...
fold1<-cvSplit[[1]]
length(fold1)
## [1] 150
cvPredictors1<-trainPredictors[fold1,]
cvClasses<-trainClasses[fold1]
nrow(trainPredictors)
## [1] 167
#튜닝변수판단
data("GermanCredit")
glimpse(GermanCredit)
## Rows: 1,000
## Columns: 62
## $ Duration                               <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount                                 <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage              <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration                      <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age                                    <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits                  <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance                <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone                              <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker                          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class                                  <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0             <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200         <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none             <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly                 <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay                    <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical                 <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar                         <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar                        <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment            <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television               <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education                      <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100             <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000        <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown            <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4              <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7              <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7                <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle              <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single                   <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None            <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate                    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance                     <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther                      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown                       <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent                           <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own                            <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree                        <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident                  <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee                    <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…
set.seed(1056)
table(GermanCredit$Class)
## 
##  Bad Good 
##  300  700
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]#[[1]]대신에 list=FALSE도 가능 
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest  <- GermanCredit[-inTrain, ]

svmFit<-train(Class~.,
              data=GermanCreditTrain,
              method='svmRadial',#'nnet''=neuralnet
              preProc=c('center','scale','BoxCox'),
              tuneLength=10,#실제결과값을 10개를 다보여줌
              trControl=trainControl(method="cv",#모형 평가 방법 
                                     repeats=5,
                                     classProbs = TRUE))
## Warning: `repeats` has no meaning for this resampling method.
svmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 800 samples
##  41 predictor
##   2 classes: 'Bad', 'Good' 
## 
## Pre-processing: centered (41), scaled (41), Box-Cox transformation (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ... 
## Resampling results across tuning parameters:
## 
##   C       Accuracy  Kappa    
##     0.25  0.76000   0.3910287
##     0.50  0.76375   0.4050255
##     1.00  0.76625   0.3885478
##     2.00  0.76625   0.3818034
##     4.00  0.76125   0.3594753
##     8.00  0.75375   0.3446185
##    16.00  0.75375   0.3236219
##    32.00  0.73375   0.2706737
##    64.00  0.73250   0.2763079
##   128.00  0.72250   0.2524035
## 
## Tuning parameter 'sigma' was held constant at a value of 0.01367206
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.01367206 and C = 1.
plot(svmFit,scale=list(x=list(log=2)))

predicted<-predict(svmFit,GermanCreditTest)
predicted
##   [1] Good Good Bad  Bad  Good Good Good Good Good Good Good Good Bad  Bad  Good
##  [16] Good Good Good Good Good Good Good Good Good Good Good Bad  Good Good Good
##  [31] Good Good Good Bad  Good Good Good Good Bad  Good Good Good Good Bad  Bad 
##  [46] Bad  Bad  Good Good Good Good Good Good Good Good Bad  Good Good Good Good
##  [61] Good Good Bad  Good Good Good Bad  Good Bad  Good Good Good Good Bad  Good
##  [76] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
##  [91] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
## [106] Good Good Good Bad  Good Good Good Good Good Good Good Good Good Good Good
## [121] Good Good Good Good Good Good Good Bad  Good Good Good Bad  Good Good Good
## [136] Good Good Good Bad  Bad  Good Good Good Bad  Good Good Bad  Good Good Bad 
## [151] Bad  Good Good Good Bad  Good Good Good Good Good Good Good Bad  Good Bad 
## [166] Good Good Bad  Good Good Good Bad  Good Bad  Good Bad  Good Good Bad  Bad 
## [181] Good Good Good Bad  Good Good Good Good Good Bad  Good Good Good Good Good
## [196] Good Bad  Good Bad  Bad 
## Levels: Bad Good
predictedProbs<-predict(svmFit,newdata=GermanCreditTest,
                        type="prob")#이거사용해야함
head(predictedProbs)
##          Bad      Good
## 1 0.33876152 0.6612385
## 2 0.08259168 0.9174083
## 3 0.55882166 0.4411783
## 4 0.50963974 0.4903603
## 5 0.14219994 0.8578001
## 6 0.33578260 0.6642174
cat(predictedProbs[[2]])
## 0.6612385 0.9174083 0.4411783 0.4903603 0.8578001 0.6642174 0.7725987 0.9351605 0.8174377 0.9298628 0.8322412 0.754578 0.2713566 0.4985467 0.7759904 0.9741471 0.819931 0.637725 0.9609196 0.8944426 0.6872734 0.8574688 0.7693772 0.5545293 0.8380217 0.6880073 0.4945644 0.8997162 0.7869101 0.8475205 0.6285916 0.7474174 0.9346821 0.4220624 0.653359 0.6362846 0.737069 0.9604411 0.3895535 0.8909656 0.7728124 0.7793097 0.9336168 0.3536514 0.4742933 0.2152898 0.4224262 0.9434183 0.9377611 0.7320562 0.8977547 0.9570366 0.8594884 0.8998881 0.9378063 0.3024888 0.9549617 0.5719311 0.9237349 0.8808034 0.9166476 0.901844 0.223118 0.9568923 0.7677628 0.674256 0.2552843 0.7630487 0.4327053 0.7073402 0.5014827 0.7637778 0.6603249 0.1076062 0.8741949 0.506621 0.8620246 0.7431002 0.9218367 0.5273419 0.9418496 0.7723959 0.8514562 0.7672604 0.5504139 0.759398 0.5689542 0.9646418 0.8187491 0.907998 0.5235187 0.534559 0.720365 0.9287172 0.8404767 0.9388071 0.7005659 0.6899153 0.8929507 0.9426763 0.9232948 0.5606805 0.815046 0.6687903 0.9423604 0.6267822 0.7799241 0.9307077 0.359694 0.9284767 0.7913503 0.6961812 0.8439401 0.7099994 0.9245359 0.8658003 0.7919118 0.8569082 0.7668951 0.8899895 0.7375167 0.9033209 0.7337488 0.8708714 0.8973274 0.7170046 0.7280797 0.375912 0.7733143 0.8602702 0.9001194 0.4535604 0.9433004 0.9057258 0.8540322 0.9159102 0.9009756 0.5356577 0.3976582 0.3909891 0.9018596 0.9521543 0.968656 0.4435137 0.934002 0.737911 0.2368053 0.9426523 0.7112473 0.3514163 0.3117373 0.8926258 0.8946715 0.9579374 0.4691112 0.8521316 0.8630506 0.7802455 0.7745462 0.7908171 0.906468 0.5194479 0.48291 0.969322 0.4470405 0.9004057 0.8702441 0.4516686 0.7417544 0.692164 0.8952413 0.3476618 0.8729552 0.4613622 0.677186 0.3372772 0.9468024 0.8489456 0.4398686 0.3390773 0.8711777 0.5763659 0.9583501 0.3587548 0.7613046 0.9286051 0.9000054 0.6407399 0.5882035 0.4202011 0.8830693 0.9571663 0.8331337 0.9175973 0.919961 0.7035797 0.4202835 0.8891012 0.3814957 0.3724153