R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

ColorG <- -((1/3)*log2(1/3)+(2/3)*log2(2/3))
ColorY <- -((8/13)*log2(8/13)+(5/13)*log2(5/13))
ColorY
## [1] 0.9612366
ColorG
## [1] 0.9182958
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
creditS3 <- read_excel("C:/Users/Harris3948/Downloads/creditS3.xlsx")
View(creditS3)

str(creditS3)
## tibble [1,001 × 21] (S3: tbl_df/tbl/data.frame)
##  $ checking_balance    : chr [1:1001] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
##  $ months_loan_duration: num [1:1001] 6 9 6 12 6 6 6 12 15 12 ...
##  $ credit_history      : chr [1:1001] "critical" "repaid" "critical" "fully repaid this bank" ...
##  $ purpose             : chr [1:1001] "car (new)" "car (new)" "radio/tv" "retraining" ...
##  $ amount              : num [1:1001] 250 276 338 339 343 362 368 385 392 409 ...
##  $ savings_balance     : chr [1:1001] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
##  $ employment_length   : chr [1:1001] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
##  $ installment_rate    : num [1:1001] 2 4 4 4 4 4 4 4 4 3 ...
##  $ personal_status     : chr [1:1001] "female" "married male" "single male" "married male" ...
##  $ other_debtors       : chr [1:1001] "none" "none" "none" "none" ...
##  $ residence_history   : num [1:1001] 2 4 4 1 1 4 4 3 4 3 ...
##  $ property            : chr [1:1001] "real estate" "real estate" "other" "other" ...
##  $ age                 : num [1:1001] 41 22 52 45 27 52 38 58 23 42 ...
##  $ installment_plan    : chr [1:1001] "bank" "none" "none" "bank" ...
##  $ housing             : chr [1:1001] "own" "rent" "own" "own" ...
##  $ existing_credits    : num [1:1001] 2 1 2 1 1 2 1 4 1 2 ...
##  $ default             : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dependents          : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
##  $ telephone           : chr [1:1001] "none" "none" "none" "none" ...
##  $ foreign_worker      : chr [1:1001] "yes" "yes" "yes" "yes" ...
##  $ job                 : chr [1:1001] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
cred <- creditS3
str(cred)
## tibble [1,001 × 21] (S3: tbl_df/tbl/data.frame)
##  $ checking_balance    : chr [1:1001] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
##  $ months_loan_duration: num [1:1001] 6 9 6 12 6 6 6 12 15 12 ...
##  $ credit_history      : chr [1:1001] "critical" "repaid" "critical" "fully repaid this bank" ...
##  $ purpose             : chr [1:1001] "car (new)" "car (new)" "radio/tv" "retraining" ...
##  $ amount              : num [1:1001] 250 276 338 339 343 362 368 385 392 409 ...
##  $ savings_balance     : chr [1:1001] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
##  $ employment_length   : chr [1:1001] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
##  $ installment_rate    : num [1:1001] 2 4 4 4 4 4 4 4 4 3 ...
##  $ personal_status     : chr [1:1001] "female" "married male" "single male" "married male" ...
##  $ other_debtors       : chr [1:1001] "none" "none" "none" "none" ...
##  $ residence_history   : num [1:1001] 2 4 4 1 1 4 4 3 4 3 ...
##  $ property            : chr [1:1001] "real estate" "real estate" "other" "other" ...
##  $ age                 : num [1:1001] 41 22 52 45 27 52 38 58 23 42 ...
##  $ installment_plan    : chr [1:1001] "bank" "none" "none" "bank" ...
##  $ housing             : chr [1:1001] "own" "rent" "own" "own" ...
##  $ existing_credits    : num [1:1001] 2 1 2 1 1 2 1 4 1 2 ...
##  $ default             : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dependents          : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
##  $ telephone           : chr [1:1001] "none" "none" "none" "none" ...
##  $ foreign_worker      : chr [1:1001] "yes" "yes" "yes" "yes" ...
##  $ job                 : chr [1:1001] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
cred2<-cred[-1001, ] 
str(cred2)
## tibble [1,000 × 21] (S3: tbl_df/tbl/data.frame)
##  $ checking_balance    : chr [1:1000] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
##  $ months_loan_duration: num [1:1000] 6 9 6 12 6 6 6 12 15 12 ...
##  $ credit_history      : chr [1:1000] "critical" "repaid" "critical" "fully repaid this bank" ...
##  $ purpose             : chr [1:1000] "car (new)" "car (new)" "radio/tv" "retraining" ...
##  $ amount              : num [1:1000] 250 276 338 339 343 362 368 385 392 409 ...
##  $ savings_balance     : chr [1:1000] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
##  $ employment_length   : chr [1:1000] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
##  $ installment_rate    : num [1:1000] 2 4 4 4 4 4 4 4 4 3 ...
##  $ personal_status     : chr [1:1000] "female" "married male" "single male" "married male" ...
##  $ other_debtors       : chr [1:1000] "none" "none" "none" "none" ...
##  $ residence_history   : num [1:1000] 2 4 4 1 1 4 4 3 4 3 ...
##  $ property            : chr [1:1000] "real estate" "real estate" "other" "other" ...
##  $ age                 : num [1:1000] 41 22 52 45 27 52 38 58 23 42 ...
##  $ installment_plan    : chr [1:1000] "bank" "none" "none" "bank" ...
##  $ housing             : chr [1:1000] "own" "rent" "own" "own" ...
##  $ existing_credits    : num [1:1000] 2 1 2 1 1 2 1 4 1 2 ...
##  $ default             : num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dependents          : num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
##  $ telephone           : chr [1:1000] "none" "none" "none" "none" ...
##  $ foreign_worker      : chr [1:1000] "yes" "yes" "yes" "yes" ...
##  $ job                 : chr [1:1000] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
table(cred2$checking_balance)
## 
##           < 0 DM         > 200 DM       1 - 200 DM checking_balance 
##              274               63              268                1 
##          unknown 
##              394
prop.table(table(cred2$checking_balance))
## 
##           < 0 DM         > 200 DM       1 - 200 DM checking_balance 
##            0.274            0.063            0.268            0.001 
##          unknown 
##            0.394
cred2[155, 1]=as.factor("1-200 DM")
cred2[155, 1]
## # A tibble: 1 × 1
##   checking_balance
##   <chr>           
## 1 1-200 DM
table(cred2$checking_balance)
## 
##     < 0 DM   > 200 DM   1-200 DM 1 - 200 DM    unknown 
##        274         63          1        268        394
prop.table(table(cred2$checking_balance))
## 
##     < 0 DM   > 200 DM   1-200 DM 1 - 200 DM    unknown 
##      0.274      0.063      0.001      0.268      0.394
table(cred2$savings_balance)
## 
##      < 100 DM     > 1000 DM  101 - 500 DM 501 - 1000 DM       unknown 
##           603            48           103            63           183
prop.table(table(cred2$savings_balance))
## 
##      < 100 DM     > 1000 DM  101 - 500 DM 501 - 1000 DM       unknown 
##         0.603         0.048         0.103         0.063         0.183
summary(cred2$months_loan_duration)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    12.0    18.0    20.9    24.0    72.0
summary(cred2$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     250    1366    2320    3271    3972   18424
str(cred2$default)
##  num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
cred2$default<-factor(cred2$default)
str(cred2$default)
##  Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
table(cred2$default)
## 
##   1   2 
## 700 300
prop.table(table(cred2$default))
## 
##   1   2 
## 0.7 0.3
set.seed(12345)
cred_rand<-cred2[order(runif(1000)),]
head(cred_rand)
## # A tibble: 6 × 21
##   checking_balance months_loan_duration credit_history         purpose   amount
##   <chr>                           <dbl> <chr>                  <chr>      <dbl>
## 1 > 200 DM                            6 fully repaid this bank education    433
## 2 unknown                             6 critical               car (new)   2080
## 3 < 0 DM                             12 critical               radio/tv    3573
## 4 1 - 200 DM                         12 repaid                 car (new)    640
## 5 unknown                            24 repaid                 business    1258
## 6 unknown                            18 repaid                 radio/tv     866
## # ℹ 16 more variables: savings_balance <chr>, employment_length <chr>,
## #   installment_rate <dbl>, personal_status <chr>, other_debtors <chr>,
## #   residence_history <dbl>, property <chr>, age <dbl>, installment_plan <chr>,
## #   housing <chr>, existing_credits <dbl>, default <fct>, dependents <dbl>,
## #   telephone <chr>, foreign_worker <chr>, job <chr>
cred_train<-cred_rand[1:900, ]
cred_test<-cred_rand[901:1000, ]
prop.table(table(cred_train$default))
## 
##         1         2 
## 0.6966667 0.3033333
prop.table(table(cred_test$default))
## 
##    1    2 
## 0.73 0.27
library(C50)
## Warning: package 'C50' was built under R version 4.3.3
m<-C5.0(cred_train, cred_train$default, trials=1, costs=NULL)
m
## 
## Call:
## C5.0.default(x = cred_train, y = cred_train$default, trials = 1, costs = NULL)
## 
## Classification Tree
## Number of samples: 900 
## Number of predictors: 21 
## 
## Tree size: 2 
## 
## Non-standard options: attempt to group attributes

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.