#=======================================================================
#======================================================================= # Rattle timestamp: 2020-09-05 23:04:09 x86_64-w64-mingw32
library(rattle) # Access the weather dataset and utilities. library(magrittr) # Utilise %>% and %<>% pipeline operators.
building <- TRUE scoring <- ! building
crv$seed <- 42
#======================================================================= # Rattle timestamp: 2020-09-05 23:04:39 x86_64-w64-mingw32
fname <- “file:///C:/Users/prana/Desktop/L6-diabetes-data-for-R-_csv_-_2__sample.csv” crs$dataset <- read.csv(fname, na.strings=c(“.”, “NA”, "“,”?“), strip.white=TRUE, encoding=”UTF-8")
#======================================================================= # Rattle timestamp: 2020-09-05 23:04:40 x86_64-w64-mingw32
set.seed(crv$seed)
crs\(nobs <- nrow(crs\)dataset)
crs\(train <- sample(crs\)nobs, 0.7*crs$nobs)
crs\(nobs %>% seq_len() %>% setdiff(crs\)train) %>% sample(0.15*crs\(nobs) -> crs\)validate
crs\(nobs %>% seq_len() %>% setdiff(crs\)train) %>% setdiff(crs\(validate) -> crs\)test
crs$input <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “location”, “age”, “gender”, “height”, “weight”, “frame”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “dm”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”, “IMO_dm”)
crs$numeric <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “age”, “height”, “weight”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”)
crs$categoric <- c(“location”, “gender”, “frame”, “dm”, “IMO_dm”)
crs\(target <- "TNM_IMO_dm" crs\)risk <- NULL crs\(ident <- NULL crs\)ignore <- NULL crs$weights <- NULL
#======================================================================= # Rattle timestamp: 2020-09-05 23:05:43 x86_64-w64-mingw32
set.seed(1)
crs\(nobs <- nrow(crs\)dataset)
crs\(train <- sample(crs\)nobs, 0.7*crs\(nobs) crs\)validate <- NULL
crs\(nobs %>% seq_len() %>% setdiff(crs\)train) %>% setdiff(crs\(validate) -> crs\)test
crs$input <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “location”, “age”, “gender”, “height”, “weight”, “frame”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “dm”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”, “IMO_dm”)
crs$numeric <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “age”, “height”, “weight”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”)
crs$categoric <- c(“location”, “gender”, “frame”, “dm”, “IMO_dm”)
crs\(target <- "TNM_IMO_dm" crs\)risk <- NULL crs\(ident <- NULL crs\)ignore <- NULL crs$weights <- NULL
#======================================================================= # Rattle timestamp: 2020-09-05 23:05:50 x86_64-w64-mingw32
set.seed(1)
crs\(nobs <- nrow(crs\)dataset)
crs\(train <- sample(crs\)nobs, 0.7*crs\(nobs) crs\)validate <- NULL
crs\(nobs %>% seq_len() %>% setdiff(crs\)train) %>% setdiff(crs\(validate) -> crs\)test
crs$input <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “location”, “age”, “gender”, “height”, “weight”, “frame”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “dm”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”, “IMO_dm”)
crs$numeric <- c(“X”, “id”, “chol”, “stab.glu”, “hdl”, “ratio”, “glyhb”, “age”, “height”, “weight”, “bp.1s”, “bp.1d”, “bp.2s”, “bp.2d”, “waist”, “hip”, “time.ppn”, “insurance”, “fh”, “smoking”, “TNM_dm”, “IMN_hdl”, “IMN_ratio”, “IMN_glyhb”, “IMN_height”, “IMN_weight”, “IMN_bp.1s”, “IMN_bp.1d”, “IMN_bp.2s”, “IMN_bp.2d”, “IMN_waist”, “IMN_hip”, “IMN_time.ppn”)
crs$categoric <- c(“location”, “gender”, “frame”, “dm”, “IMO_dm”)
crs\(target <- "TNM_IMO_dm" crs\)risk <- NULL crs\(ident <- NULL crs\)ignore <- NULL crs$weights <- NULL
#======================================================================= # Rattle timestamp: 2020-09-05 23:06:34 x86_64-w64-mingw32
library(rpart, quietly=TRUE)
set.seed(crv$seed)
crs\(rpart <- rpart(TNM_IMO_dm ~ ., data=crs\)dataset[crs\(train, c(crs\)input, crs$target)], method=“class”, parms=list(split=“information”), control=rpart.control(usesurrogate=0, maxsurrogate=0), model=TRUE)
print(crs\(rpart) printcp(crs\)rpart) cat(“”)
#======================================================================= # Rattle timestamp: 2020-09-05 23:06:51 x86_64-w64-mingw32
crs\(pr <- predict(crs\)rpart, newdata=crs\(dataset[crs\)test, c(crs\(input, crs\)target)], type=“class”)
rattle::errorMatrix(crs\(dataset[crs\)test, c(crs\(input, crs\)target)]\(TNM_IMO_dm, crs\)pr, count=TRUE)
(per <- rattle::errorMatrix(crs\(dataset[crs\)test, c(crs\(input, crs\)target)]\(TNM_IMO_dm, crs\)pr))
cat(100-sum(diag(per), na.rm=TRUE))
cat(mean(per[,“Error”], na.rm=TRUE))