# RWekaADT_Classification_GermanCreditData 
# Testing the Effectiveness of - Alternating Decision Trees [ADT]
library(RWeka)
library("RWeka", lib.loc="~/R/win-library/3.1")
#
# library("RCurl", lib.loc="~/R/win-library/3.1")
# #
# # We shall use "gc" seen below -- 
# gc_url<-getURL("https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data")
# gc<-read.table(textConnection(gc_url),header =F)
# #
# names(gc) <- c("check_Acc_Status" , "Duration_Months" , "Credit_history" , "Credit_purpose" ,       
#                "amount" , "savings" , "employ.since" , "installment.rate" , "status.sex" ,        
#                "cosigners" , "residence.since", "collateral" , "age" , "otherplans" ,   
#                "housing" , "existing.credits" , "job" , "no.dependents" , "telephone",           
#                "foreign", "default" )
# #
# summary(gc);str(gc);head(gc,10);tail(gc,10)
# 
# # First Testing RWeka with Iris Data ....
iris_j48 <- J48(Species ~ ., data = iris)
iris_j48
## J48 pruned tree
## ------------------
## 
## Petal.Width <= 0.6: setosa (50.0)
## Petal.Width > 0.6
## |   Petal.Width <= 1.7
## |   |   Petal.Length <= 4.9: versicolor (48.0/1.0)
## |   |   Petal.Length > 4.9
## |   |   |   Petal.Width <= 1.5: virginica (3.0)
## |   |   |   Petal.Width > 1.5: versicolor (3.0/1.0)
## |   Petal.Width > 1.7: virginica (46.0/1.0)
## 
## Number of Leaves  :  5
## 
## Size of the tree :   9
summary(iris_j48)
## 
## === Summary ===
## 
## Correctly Classified Instances         147               98      %
## Incorrectly Classified Instances         3                2      %
## Kappa statistic                          0.97  
## Mean absolute error                      0.0233
## Root mean squared error                  0.108 
## Relative absolute error                  5.2482 %
## Root relative squared error             22.9089 %
## Coverage of cases (0.95 level)          98.6667 %
## Mean rel. region size (0.95 level)      34      %
## Total Number of Instances              150     
## 
## === Confusion Matrix ===
## 
##   a  b  c   <-- classified as
##  50  0  0 |  a = setosa
##   0 49  1 |  b = versicolor
##   0  2 48 |  c = virginica
# plot(iris_j48) 
library(partykit)
## Loading required package: grid
plot(as.party(iris_j48)) # we use the partykit-package for nice plotting.

#
#
gcf <- read.csv("C:/STAT/_Own_R/Credit/Credit-1/gcf.csv")
str(gcf)
## 'data.frame':    1000 obs. of  18 variables:
##  $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
##  $ Credit_history  : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
##  $ Credit_purpose  : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
##  $ savings         : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
##  $ employ.since    : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
##  $ installment.rate: Factor w/ 4 levels "IR1","IR2","IR3",..: 4 2 2 2 3 2 3 2 2 4 ...
##  $ status.sex      : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
##  $ cosigners       : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
##  $ residence.since : Factor w/ 4 levels "RS1","RS2","RS3",..: 4 2 3 4 4 4 4 2 4 2 ...
##  $ collateral      : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
##  $ otherplans      : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ housing         : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
##  $ existing.credits: Factor w/ 4 levels "EC1","EC2","EC3",..: 2 1 1 1 2 1 1 1 1 2 ...
##  $ job             : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
##  $ no.dependents   : Factor w/ 2 levels "ND1","ND2": 1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone       : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign         : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
##  $ default         : Factor w/ 2 levels "D1","D2": 1 2 1 1 2 1 1 1 1 2 ...
gcf_j48 <- J48( default ~ ., data = gcf)
gcf_j48
## J48 pruned tree
## ------------------
## 
## check_Acc_Status = A11
## |   foreign = A201
## |   |   Credit_history = A30: D2 (13.0/3.0)
## |   |   Credit_history = A31: D2 (21.0/5.0)
## |   |   Credit_history = A32
## |   |   |   cosigners = A101
## |   |   |   |   savings = A61: D2 (103.0/44.0)
## |   |   |   |   savings = A62
## |   |   |   |   |   telephone = A191: D2 (4.0)
## |   |   |   |   |   telephone = A192: D1 (4.0/1.0)
## |   |   |   |   savings = A63: D1 (3.0)
## |   |   |   |   savings = A64: D1 (4.0)
## |   |   |   |   savings = A65
## |   |   |   |   |   job = A171: D2 (0.0)
## |   |   |   |   |   job = A172: D1 (2.0)
## |   |   |   |   |   job = A173
## |   |   |   |   |   |   telephone = A191: D2 (10.0/1.0)
## |   |   |   |   |   |   telephone = A192: D1 (4.0/1.0)
## |   |   |   |   |   job = A174: D2 (3.0/1.0)
## |   |   |   cosigners = A102: D2 (7.0/3.0)
## |   |   |   cosigners = A103: D1 (11.0/1.0)
## |   |   Credit_history = A33: D2 (12.0/3.0)
## |   |   Credit_history = A34: D1 (58.0/17.0)
## |   foreign = A202: D1 (15.0/2.0)
## check_Acc_Status = A12
## |   cosigners = A101
## |   |   savings = A61
## |   |   |   collateral = A121
## |   |   |   |   no.dependents = ND1
## |   |   |   |   |   existing.credits = EC1: D1 (18.0/4.0)
## |   |   |   |   |   existing.credits = EC2: D2 (7.0/1.0)
## |   |   |   |   |   existing.credits = EC3: D1 (2.0/1.0)
## |   |   |   |   |   existing.credits = EC4: D1 (0.0)
## |   |   |   |   no.dependents = ND2: D1 (4.0)
## |   |   |   collateral = A122: D1 (27.0/11.0)
## |   |   |   collateral = A123
## |   |   |   |   existing.credits = EC1
## |   |   |   |   |   housing = A151: D1 (7.0/2.0)
## |   |   |   |   |   housing = A152: D2 (25.0/7.0)
## |   |   |   |   |   housing = A153: D2 (0.0)
## |   |   |   |   existing.credits = EC2
## |   |   |   |   |   status.sex = A91: D2 (1.0)
## |   |   |   |   |   status.sex = A92
## |   |   |   |   |   |   job = A171: D1 (0.0)
## |   |   |   |   |   |   job = A172: D1 (0.0)
## |   |   |   |   |   |   job = A173: D1 (2.0)
## |   |   |   |   |   |   job = A174: D2 (2.0)
## |   |   |   |   |   status.sex = A93: D1 (8.0)
## |   |   |   |   |   status.sex = A94: D2 (2.0)
## |   |   |   |   existing.credits = EC3: D1 (3.0)
## |   |   |   |   existing.credits = EC4: D1 (0.0)
## |   |   |   collateral = A124: D2 (20.0/5.0)
## |   |   savings = A62
## |   |   |   Credit_history = A30: D2 (4.0/1.0)
## |   |   |   Credit_history = A31: D2 (6.0/1.0)
## |   |   |   Credit_history = A32
## |   |   |   |   collateral = A121: D1 (3.0)
## |   |   |   |   collateral = A122: D2 (3.0)
## |   |   |   |   collateral = A123
## |   |   |   |   |   housing = A151: D2 (4.0)
## |   |   |   |   |   housing = A152: D1 (5.0/1.0)
## |   |   |   |   |   housing = A153: D2 (0.0)
## |   |   |   |   collateral = A124: D1 (2.0/1.0)
## |   |   |   Credit_history = A33: D1 (10.0/2.0)
## |   |   |   Credit_history = A34: D1 (5.0/1.0)
## |   |   savings = A63: D1 (11.0/3.0)
## |   |   savings = A64: D1 (13.0/3.0)
## |   |   savings = A65: D1 (42.0/7.0)
## |   cosigners = A102
## |   |   collateral = A121: D1 (2.0)
## |   |   collateral = A122: D2 (0.0)
## |   |   collateral = A123: D2 (3.0)
## |   |   collateral = A124: D2 (4.0/1.0)
## |   cosigners = A103
## |   |   Credit_purpose = A40: D2 (2.0)
## |   |   Credit_purpose = A41: D1 (0.0)
## |   |   Credit_purpose = A410: D1 (0.0)
## |   |   Credit_purpose = A42: D1 (0.0)
## |   |   Credit_purpose = A43: D1 (21.0/2.0)
## |   |   Credit_purpose = A44: D1 (0.0)
## |   |   Credit_purpose = A45: D1 (1.0)
## |   |   Credit_purpose = A46: D1 (0.0)
## |   |   Credit_purpose = A48: D1 (0.0)
## |   |   Credit_purpose = A49: D1 (0.0)
## check_Acc_Status = A13: D1 (63.0/14.0)
## check_Acc_Status = A14: D1 (394.0/46.0)
## 
## Number of Leaves  :  65
## 
## Size of the tree :   87
summary(gcf_j48)
## 
## === Summary ===
## 
## Correctly Classified Instances         804               80.4    %
## Incorrectly Classified Instances       196               19.6    %
## Kappa statistic                          0.5129
## Mean absolute error                      0.2878
## Root mean squared error                  0.3793
## Relative absolute error                 68.4983 %
## Root relative squared error             82.7794 %
## Coverage of cases (0.95 level)         100      %
## Mean rel. region size (0.95 level)      97.35   %
## Total Number of Instances             1000     
## 
## === Confusion Matrix ===
## 
##    a   b   <-- classified as
##  624  76 |   a = D1
##  120 180 |   b = D2
# plot(iris_j48) 
library(partykit)
plot(as.party(gcf_j48)) # we use the partykit-package for nice plotting.

#
# ?J48
# evaluate_Weka_classifier
# ?evaluate_Weka_classifier
# ## Use some example data.
# w <- read.arff(system.file("arff","weather.nominal.arff", 
#                            package = "RWeka"))