This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
ColorG <- -((1/3)*log2(1/3)+(2/3)*log2(2/3))
ColorY <- -((8/13)*log2(8/13)+(5/13)*log2(5/13))
ColorY
## [1] 0.9612366
ColorG
## [1] 0.9182958
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
creditS3 <- read_excel("C:/Users/Harris3948/Downloads/creditS3.xlsx")
View(creditS3)
str(creditS3)
## tibble [1,001 × 21] (S3: tbl_df/tbl/data.frame)
## $ checking_balance : chr [1:1001] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
## $ months_loan_duration: num [1:1001] 6 9 6 12 6 6 6 12 15 12 ...
## $ credit_history : chr [1:1001] "critical" "repaid" "critical" "fully repaid this bank" ...
## $ purpose : chr [1:1001] "car (new)" "car (new)" "radio/tv" "retraining" ...
## $ amount : num [1:1001] 250 276 338 339 343 362 368 385 392 409 ...
## $ savings_balance : chr [1:1001] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
## $ employment_length : chr [1:1001] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
## $ installment_rate : num [1:1001] 2 4 4 4 4 4 4 4 4 3 ...
## $ personal_status : chr [1:1001] "female" "married male" "single male" "married male" ...
## $ other_debtors : chr [1:1001] "none" "none" "none" "none" ...
## $ residence_history : num [1:1001] 2 4 4 1 1 4 4 3 4 3 ...
## $ property : chr [1:1001] "real estate" "real estate" "other" "other" ...
## $ age : num [1:1001] 41 22 52 45 27 52 38 58 23 42 ...
## $ installment_plan : chr [1:1001] "bank" "none" "none" "bank" ...
## $ housing : chr [1:1001] "own" "rent" "own" "own" ...
## $ existing_credits : num [1:1001] 2 1 2 1 1 2 1 4 1 2 ...
## $ default : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
## $ dependents : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
## $ telephone : chr [1:1001] "none" "none" "none" "none" ...
## $ foreign_worker : chr [1:1001] "yes" "yes" "yes" "yes" ...
## $ job : chr [1:1001] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
cred <- creditS3
str(cred)
## tibble [1,001 × 21] (S3: tbl_df/tbl/data.frame)
## $ checking_balance : chr [1:1001] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
## $ months_loan_duration: num [1:1001] 6 9 6 12 6 6 6 12 15 12 ...
## $ credit_history : chr [1:1001] "critical" "repaid" "critical" "fully repaid this bank" ...
## $ purpose : chr [1:1001] "car (new)" "car (new)" "radio/tv" "retraining" ...
## $ amount : num [1:1001] 250 276 338 339 343 362 368 385 392 409 ...
## $ savings_balance : chr [1:1001] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
## $ employment_length : chr [1:1001] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
## $ installment_rate : num [1:1001] 2 4 4 4 4 4 4 4 4 3 ...
## $ personal_status : chr [1:1001] "female" "married male" "single male" "married male" ...
## $ other_debtors : chr [1:1001] "none" "none" "none" "none" ...
## $ residence_history : num [1:1001] 2 4 4 1 1 4 4 3 4 3 ...
## $ property : chr [1:1001] "real estate" "real estate" "other" "other" ...
## $ age : num [1:1001] 41 22 52 45 27 52 38 58 23 42 ...
## $ installment_plan : chr [1:1001] "bank" "none" "none" "bank" ...
## $ housing : chr [1:1001] "own" "rent" "own" "own" ...
## $ existing_credits : num [1:1001] 2 1 2 1 1 2 1 4 1 2 ...
## $ default : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
## $ dependents : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
## $ telephone : chr [1:1001] "none" "none" "none" "none" ...
## $ foreign_worker : chr [1:1001] "yes" "yes" "yes" "yes" ...
## $ job : chr [1:1001] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
cred2<-cred[-1001, ]
str(cred2)
## tibble [1,000 × 21] (S3: tbl_df/tbl/data.frame)
## $ checking_balance : chr [1:1000] "unknown" "1 - 200 DM" "< 0 DM" "< 0 DM" ...
## $ months_loan_duration: num [1:1000] 6 9 6 12 6 6 6 12 15 12 ...
## $ credit_history : chr [1:1000] "critical" "repaid" "critical" "fully repaid this bank" ...
## $ purpose : chr [1:1000] "car (new)" "car (new)" "radio/tv" "retraining" ...
## $ amount : num [1:1000] 250 276 338 339 343 362 368 385 392 409 ...
## $ savings_balance : chr [1:1000] "> 1000 DM" "< 100 DM" "501 - 1000 DM" "< 100 DM" ...
## $ employment_length : chr [1:1000] "1 - 4 yrs" "1 - 4 yrs" "> 7 yrs" "> 7 yrs" ...
## $ installment_rate : num [1:1000] 2 4 4 4 4 4 4 4 4 3 ...
## $ personal_status : chr [1:1000] "female" "married male" "single male" "married male" ...
## $ other_debtors : chr [1:1000] "none" "none" "none" "none" ...
## $ residence_history : num [1:1000] 2 4 4 1 1 4 4 3 4 3 ...
## $ property : chr [1:1000] "real estate" "real estate" "other" "other" ...
## $ age : num [1:1000] 41 22 52 45 27 52 38 58 23 42 ...
## $ installment_plan : chr [1:1000] "bank" "none" "none" "bank" ...
## $ housing : chr [1:1000] "own" "rent" "own" "own" ...
## $ existing_credits : num [1:1000] 2 1 2 1 1 2 1 4 1 2 ...
## $ default : num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
## $ dependents : num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
## $ telephone : chr [1:1000] "none" "none" "none" "none" ...
## $ foreign_worker : chr [1:1000] "yes" "yes" "yes" "yes" ...
## $ job : chr [1:1000] "unskilled resident" "unskilled resident" "skilled employee" "unskilled resident" ...
table(cred2$checking_balance)
##
## < 0 DM > 200 DM 1 - 200 DM checking_balance
## 274 63 268 1
## unknown
## 394
prop.table(table(cred2$checking_balance))
##
## < 0 DM > 200 DM 1 - 200 DM checking_balance
## 0.274 0.063 0.268 0.001
## unknown
## 0.394
cred2[155, 1]=as.factor("1-200 DM")
cred2[155, 1]
## # A tibble: 1 × 1
## checking_balance
## <chr>
## 1 1-200 DM
table(cred2$checking_balance)
##
## < 0 DM > 200 DM 1-200 DM 1 - 200 DM unknown
## 274 63 1 268 394
prop.table(table(cred2$checking_balance))
##
## < 0 DM > 200 DM 1-200 DM 1 - 200 DM unknown
## 0.274 0.063 0.001 0.268 0.394
table(cred2$savings_balance)
##
## < 100 DM > 1000 DM 101 - 500 DM 501 - 1000 DM unknown
## 603 48 103 63 183
prop.table(table(cred2$savings_balance))
##
## < 100 DM > 1000 DM 101 - 500 DM 501 - 1000 DM unknown
## 0.603 0.048 0.103 0.063 0.183
summary(cred2$months_loan_duration)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 12.0 18.0 20.9 24.0 72.0
summary(cred2$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 250 1366 2320 3271 3972 18424
str(cred2$default)
## num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
cred2$default<-factor(cred2$default)
str(cred2$default)
## Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
table(cred2$default)
##
## 1 2
## 700 300
prop.table(table(cred2$default))
##
## 1 2
## 0.7 0.3
set.seed(12345)
cred_rand<-cred2[order(runif(1000)),]
head(cred_rand)
## # A tibble: 6 × 21
## checking_balance months_loan_duration credit_history purpose amount
## <chr> <dbl> <chr> <chr> <dbl>
## 1 > 200 DM 6 fully repaid this bank education 433
## 2 unknown 6 critical car (new) 2080
## 3 < 0 DM 12 critical radio/tv 3573
## 4 1 - 200 DM 12 repaid car (new) 640
## 5 unknown 24 repaid business 1258
## 6 unknown 18 repaid radio/tv 866
## # ℹ 16 more variables: savings_balance <chr>, employment_length <chr>,
## # installment_rate <dbl>, personal_status <chr>, other_debtors <chr>,
## # residence_history <dbl>, property <chr>, age <dbl>, installment_plan <chr>,
## # housing <chr>, existing_credits <dbl>, default <fct>, dependents <dbl>,
## # telephone <chr>, foreign_worker <chr>, job <chr>
cred_train<-cred_rand[1:900, ]
cred_test<-cred_rand[901:1000, ]
prop.table(table(cred_train$default))
##
## 1 2
## 0.6966667 0.3033333
prop.table(table(cred_test$default))
##
## 1 2
## 0.73 0.27
library(C50)
## Warning: package 'C50' was built under R version 4.3.3
m<-C5.0(cred_train, cred_train$default, trials=1, costs=NULL)
m
##
## Call:
## C5.0.default(x = cred_train, y = cred_train$default, trials = 1, costs = NULL)
##
## Classification Tree
## Number of samples: 900
## Number of predictors: 21
##
## Tree size: 2
##
## Non-standard options: attempt to group attributes
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.