2301101

library(caret)

## 필요한 패키지를 로딩중입니다: ggplot2

## 필요한 패키지를 로딩중입니다: lattice

#install.packages("AppliedPredictiveModeling")
library(AppliedPredictiveModeling)
library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data(twoClassData)
head(predictors)

##   PredictorA PredictorB
## 1     0.1582     0.1609
## 2     0.6552     0.4918
## 3     0.7060     0.6333
## 4     0.1992     0.0881
## 5     0.3952     0.4152
## 6     0.4250     0.2988

head(classes)

## [1] Class2 Class2 Class2 Class2 Class2 Class2
## Levels: Class1 Class2

length(classes)

## [1] 208

set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE)
head(traingRows)

##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         7
## [5,]         8
## [6,]         9

trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)

## 'data.frame':    167 obs. of  2 variables:
##  $ PredictorA: num  0.1582 0.6552 0.706 0.0658 0.3086 ...
##  $ PredictorB: num  0.161 0.492 0.633 0.179 0.28 ...

str(testPredictors)

## 'data.frame':    41 obs. of  2 variables:
##  $ PredictorA: num  0.1992 0.3952 0.425 0.0847 0.2909 ...
##  $ PredictorB: num  0.0881 0.4152 0.2988 0.0548 0.3021 ...

repeatedSplit<-createDataPartition(trainClasses,p=0.8,
                                   times=3)
str(repeatedSplit)

## List of 3
##  $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...

set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain = TRUE)
str(cvSplit)

## List of 10
##  $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
##  $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
##  $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
##  $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
##  $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
##  $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
##  $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...

fold1<-cvSplit[[1]]
length(fold1)

## [1] 150

cvPredictors1<-trainPredictors[fold1,]
cvClasses1<-trainClasses[fold1]
nrow(trainPredictors)

## [1] 167

nrow(cvPredictors1)

## [1] 150

library(caret)

data("GermanCredit")
glimpse(GermanCredit)

## Rows: 1,000
## Columns: 62
## $ Duration                               <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount                                 <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage              <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration                      <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age                                    <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits                  <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance                <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone                              <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker                          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class                                  <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0             <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200         <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none             <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly                 <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay                    <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical                 <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar                         <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar                        <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment            <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television               <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education                      <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100             <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000        <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown            <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4              <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7              <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7                <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle              <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single                   <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None            <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate                    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance                     <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther                      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown                       <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent                           <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own                            <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree                        <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident                  <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee                    <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…

set.seed(1056)
table(GermanCredit$Class)

## 
##  Bad Good 
##  300  700

set.seed(1056)
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest  <- GermanCredit[-inTrain, ]

svmFit<-train(Class~.,
              data=GermanCreditTrain,
              method='svmRadial',
              preProc=c('center','scale'),
              tuneLength=10,
              trControl=trainControl(method="cv",
                                     repeats=5,
                                     classProbs = TRUE))

## Warning: `repeats` has no meaning for this resampling method.

svmFit

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 800 samples
##  41 predictor
##   2 classes: 'Bad', 'Good' 
## 
## Pre-processing: centered (41), scaled (41) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ... 
## Resampling results across tuning parameters:
## 
##   C       Accuracy  Kappa    
##     0.25  0.75625   0.3858886
##     0.50  0.76500   0.4087530
##     1.00  0.77000   0.3961280
##     2.00  0.76375   0.3782815
##     4.00  0.76500   0.3688453
##     8.00  0.77000   0.3892362
##    16.00  0.75125   0.3151667
##    32.00  0.73500   0.2797416
##    64.00  0.73250   0.2770492
##   128.00  0.72875   0.2713074
## 
## Tuning parameter 'sigma' was held constant at a value of 0.0138183
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.0138183 and C = 1.

plot(svmFit,scales=list(x=list(log=2)))

predicted<-predict(svmFit,GermanCreditTest)

predictedProbs<-predict(svmFit,newdata=GermanCreditTest,
                        type="prob")
head(predictedProbs)

##         Bad      Good
## 1 0.4238216 0.5761784
## 2 0.0730835 0.9269165
## 3 0.5650184 0.4349816
## 4 0.6932819 0.3067181
## 5 0.1754360 0.8245640
## 6 0.3223266 0.6776734

2301101

Jeong Sun Mi

2023-01-10