lecture_07

library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(AppliedPredictiveModeling)

data(twoClassData)
head(predictors)

##   PredictorA PredictorB
## 1     0.1582     0.1609
## 2     0.6552     0.4918
## 3     0.7060     0.6333
## 4     0.1992     0.0881
## 5     0.3952     0.4152
## 6     0.4250     0.2988

class(predictors)

## [1] "data.frame"

glimpse(predictors)

## Rows: 208
## Columns: 2
## $ PredictorA <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3…
## $ PredictorB <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2…

set.seed(1)
traingRows<-createDataPartition(classes,p=.8,list=FALSE)
trainPredictors<-predictors[traingRows,]
trainClasses<-classes[traingRows]
testPredictors<-predictors[-traingRows,]
testClasses<-classes[-traingRows]
str(trainPredictors)

## 'data.frame':    167 obs. of  2 variables:
##  $ PredictorA: num  0.1582 0.6552 0.706 0.0658 0.3086 ...
##  $ PredictorB: num  0.161 0.492 0.633 0.179 0.28 ...

str(testPredictors)

## 'data.frame':    41 obs. of  2 variables:
##  $ PredictorA: num  0.1992 0.3952 0.425 0.0847 0.2909 ...
##  $ PredictorB: num  0.0881 0.4152 0.2988 0.0548 0.3021 ...

repeatedSplit<-createDataPartition(trainClasses,p=.8,times=3)
str(repeatedSplit)

## List of 3
##  $ Resample1: int [1:135] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Resample2: int [1:135] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Resample3: int [1:135] 1 2 3 4 5 6 9 10 11 14 ...

set.seed(1)
cvSplit<-createFolds(trainClasses,k=10,returnTrain=TRUE)
str(cvSplit)

## List of 10
##  $ Fold01: int [1:150] 1 2 4 5 6 7 8 10 11 13 ...
##  $ Fold02: int [1:150] 1 2 3 4 6 7 8 9 10 11 ...
##  $ Fold03: int [1:150] 1 3 4 5 6 7 8 9 10 11 ...
##  $ Fold04: int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Fold05: int [1:150] 2 3 4 5 6 7 8 9 10 11 ...
##  $ Fold06: int [1:150] 1 2 3 4 5 6 7 8 9 11 ...
##  $ Fold07: int [1:150] 1 2 3 4 5 6 7 9 10 12 ...
##  $ Fold08: int [1:151] 1 2 3 4 5 6 8 9 10 11 ...
##  $ Fold09: int [1:151] 1 2 3 5 6 7 8 9 10 11 ...
##  $ Fold10: int [1:151] 1 2 3 4 5 7 8 9 10 11 ...

fold1<-cvSplit[[1]]
length(fold1)

## [1] 150

cvPredictors1<-trainPredictors[fold1,]
cvClasses<-trainClasses[fold1]
nrow(trainPredictors)

## [1] 167

#튜닝변수판단
data("GermanCredit")
glimpse(GermanCredit)

## Rows: 1,000
## Columns: 62
## $ Duration                               <int> 6, 48, 12, 42, 24, 36, 24, 36, …
## $ Amount                                 <int> 1169, 5951, 2096, 7882, 4870, 9…
## $ InstallmentRatePercentage              <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3…
## $ ResidenceDuration                      <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1…
## $ Age                                    <int> 67, 22, 49, 45, 53, 35, 53, 35,…
## $ NumberExistingCredits                  <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…
## $ NumberPeopleMaintenance                <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1…
## $ Telephone                              <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1…
## $ ForeignWorker                          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Class                                  <fct> Good, Bad, Good, Good, Bad, Goo…
## $ CheckingAccountStatus.lt.0             <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.0.to.200         <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ CheckingAccountStatus.gt.200           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CheckingAccountStatus.none             <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ CreditHistory.NoCredit.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.ThisBank.AllPaid         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.PaidDuly                 <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1…
## $ CreditHistory.Delay                    <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ CreditHistory.Critical                 <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Purpose.NewCar                         <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1…
## $ Purpose.UsedCar                        <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Purpose.Furniture.Equipment            <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Purpose.Radio.Television               <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Purpose.DomesticAppliance              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Repairs                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Education                      <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Purpose.Vacation                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Retraining                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Business                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Purpose.Other                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.lt.100             <dbl> 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1…
## $ SavingsAccountBonds.100.to.500         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SavingsAccountBonds.500.to.1000        <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ SavingsAccountBonds.gt.1000            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ SavingsAccountBonds.Unknown            <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ EmploymentDuration.lt.1                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ EmploymentDuration.1.to.4              <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0…
## $ EmploymentDuration.4.to.7              <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0…
## $ EmploymentDuration.gt.7                <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ EmploymentDuration.Unemployed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Male.Divorced.Seperated       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Personal.Female.NotSingle              <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Personal.Male.Single                   <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0…
## $ Personal.Male.Married.Widowed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Personal.Female.Single                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.None            <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ OtherDebtorsGuarantors.CoApplicant     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherDebtorsGuarantors.Guarantor       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Property.RealEstate                    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Property.Insurance                     <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ Property.CarOther                      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1…
## $ Property.Unknown                       <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Bank             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.Stores           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ OtherInstallmentPlans.None             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Housing.Rent                           <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ Housing.Own                            <dbl> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ Housing.ForFree                        <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ Job.UnemployedUnskilled                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Job.UnskilledResident                  <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ Job.SkilledEmployee                    <dbl> 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1…
## $ Job.Management.SelfEmp.HighlyQualified <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0…

set.seed(1056)
table(GermanCredit$Class)

## 
##  Bad Good 
##  300  700

GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]#[[1]]대신에 list=FALSE도 가능 
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest  <- GermanCredit[-inTrain, ]

svmFit<-train(Class~.,
              data=GermanCreditTrain,
              method='svmRadial',#'nnet''=neuralnet
              preProc=c('center','scale','BoxCox'),
              tuneLength=10,#실제결과값을 10개를 다보여줌
              trControl=trainControl(method="cv",#모형 평가 방법 
                                     repeats=5,
                                     classProbs = TRUE))

## Warning: `repeats` has no meaning for this resampling method.

svmFit

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 800 samples
##  41 predictor
##   2 classes: 'Bad', 'Good' 
## 
## Pre-processing: centered (41), scaled (41), Box-Cox transformation (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 720, 720, 720, 720, 720, 720, ... 
## Resampling results across tuning parameters:
## 
##   C       Accuracy  Kappa    
##     0.25  0.76000   0.3910287
##     0.50  0.76375   0.4050255
##     1.00  0.76625   0.3885478
##     2.00  0.76625   0.3818034
##     4.00  0.76125   0.3594753
##     8.00  0.75375   0.3446185
##    16.00  0.75375   0.3236219
##    32.00  0.73375   0.2706737
##    64.00  0.73250   0.2763079
##   128.00  0.72250   0.2524035
## 
## Tuning parameter 'sigma' was held constant at a value of 0.01367206
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.01367206 and C = 1.

plot(svmFit,scale=list(x=list(log=2)))

predicted<-predict(svmFit,GermanCreditTest)
predicted

##   [1] Good Good Bad  Bad  Good Good Good Good Good Good Good Good Bad  Bad  Good
##  [16] Good Good Good Good Good Good Good Good Good Good Good Bad  Good Good Good
##  [31] Good Good Good Bad  Good Good Good Good Bad  Good Good Good Good Bad  Bad 
##  [46] Bad  Bad  Good Good Good Good Good Good Good Good Bad  Good Good Good Good
##  [61] Good Good Bad  Good Good Good Bad  Good Bad  Good Good Good Good Bad  Good
##  [76] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
##  [91] Good Good Good Good Good Good Good Good Good Good Good Good Good Good Good
## [106] Good Good Good Bad  Good Good Good Good Good Good Good Good Good Good Good
## [121] Good Good Good Good Good Good Good Bad  Good Good Good Bad  Good Good Good
## [136] Good Good Good Bad  Bad  Good Good Good Bad  Good Good Bad  Good Good Bad 
## [151] Bad  Good Good Good Bad  Good Good Good Good Good Good Good Bad  Good Bad 
## [166] Good Good Bad  Good Good Good Bad  Good Bad  Good Bad  Good Good Bad  Bad 
## [181] Good Good Good Bad  Good Good Good Good Good Bad  Good Good Good Good Good
## [196] Good Bad  Good Bad  Bad 
## Levels: Bad Good

predictedProbs<-predict(svmFit,newdata=GermanCreditTest,
                        type="prob")#이거사용해야함
head(predictedProbs)

##          Bad      Good
## 1 0.33876152 0.6612385
## 2 0.08259168 0.9174083
## 3 0.55882166 0.4411783
## 4 0.50963974 0.4903603
## 5 0.14219994 0.8578001
## 6 0.33578260 0.6642174

cat(predictedProbs[[2]])

## 0.6612385 0.9174083 0.4411783 0.4903603 0.8578001 0.6642174 0.7725987 0.9351605 0.8174377 0.9298628 0.8322412 0.754578 0.2713566 0.4985467 0.7759904 0.9741471 0.819931 0.637725 0.9609196 0.8944426 0.6872734 0.8574688 0.7693772 0.5545293 0.8380217 0.6880073 0.4945644 0.8997162 0.7869101 0.8475205 0.6285916 0.7474174 0.9346821 0.4220624 0.653359 0.6362846 0.737069 0.9604411 0.3895535 0.8909656 0.7728124 0.7793097 0.9336168 0.3536514 0.4742933 0.2152898 0.4224262 0.9434183 0.9377611 0.7320562 0.8977547 0.9570366 0.8594884 0.8998881 0.9378063 0.3024888 0.9549617 0.5719311 0.9237349 0.8808034 0.9166476 0.901844 0.223118 0.9568923 0.7677628 0.674256 0.2552843 0.7630487 0.4327053 0.7073402 0.5014827 0.7637778 0.6603249 0.1076062 0.8741949 0.506621 0.8620246 0.7431002 0.9218367 0.5273419 0.9418496 0.7723959 0.8514562 0.7672604 0.5504139 0.759398 0.5689542 0.9646418 0.8187491 0.907998 0.5235187 0.534559 0.720365 0.9287172 0.8404767 0.9388071 0.7005659 0.6899153 0.8929507 0.9426763 0.9232948 0.5606805 0.815046 0.6687903 0.9423604 0.6267822 0.7799241 0.9307077 0.359694 0.9284767 0.7913503 0.6961812 0.8439401 0.7099994 0.9245359 0.8658003 0.7919118 0.8569082 0.7668951 0.8899895 0.7375167 0.9033209 0.7337488 0.8708714 0.8973274 0.7170046 0.7280797 0.375912 0.7733143 0.8602702 0.9001194 0.4535604 0.9433004 0.9057258 0.8540322 0.9159102 0.9009756 0.5356577 0.3976582 0.3909891 0.9018596 0.9521543 0.968656 0.4435137 0.934002 0.737911 0.2368053 0.9426523 0.7112473 0.3514163 0.3117373 0.8926258 0.8946715 0.9579374 0.4691112 0.8521316 0.8630506 0.7802455 0.7745462 0.7908171 0.906468 0.5194479 0.48291 0.969322 0.4470405 0.9004057 0.8702441 0.4516686 0.7417544 0.692164 0.8952413 0.3476618 0.8729552 0.4613622 0.677186 0.3372772 0.9468024 0.8489456 0.4398686 0.3390773 0.8711777 0.5763659 0.9583501 0.3587548 0.7613046 0.9286051 0.9000054 0.6407399 0.5882035 0.4202011 0.8830693 0.9571663 0.8331337 0.9175973 0.919961 0.7035797 0.4202835 0.8891012 0.3814957 0.3724153

lecture_07_1

2023-01-10