Data yang digunakan : Data HCC Survival

Memanggil Package yang dibutuhkan :

library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(imputeMissings)
## 
## Attaching package: 'imputeMissings'
## The following object is masked from 'package:dplyr':
## 
##     compute
library(ggplot2)
library(ggcorrplot)
  1. Input Dataset Atribut Kategorik
HCCKategorik=read_excel("D:\\TINGKAT IV\\5 - DATA MINING\\TUGAS\\Data HCC Survival 4SE1.xlsx",sheet="Data Kategorik")
glimpse(HCCKategorik)
## Rows: 165
## Columns: 28
## $ Patients                       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ~
## $ Gender                         <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Symptoms                       <chr> "0", "?", "0", "1", "1", "0", "0", "1",~
## $ Alcohol                        <dbl> 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ~
## $ `Hepatitis B Surface Antigen`  <chr> "0", "0", "1", "0", "1", "0", "0", "0",~
## $ `Hepatitis B e Antigen`        <chr> "0", "0", "0", "0", "0", "?", "?", "?",~
## $ `Hepatitis B Core Antibody`    <chr> "0", "0", "1", "0", "1", "0", "1", "0",~
## $ `Hepatitis C Virus Antibody`   <chr> "0", "1", "0", "0", "0", "0", "1", "0",~
## $ Cirrhosis                      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ `Endemic Countries`            <chr> "0", "?", "0", "0", "0", "0", "0", "0",~
## $ Smoking                        <chr> "1", "?", "1", "1", "1", "?", "0", "1",~
## $ Diabetes                       <chr> "1", "1", "0", "1", "0", "0", "1", "1",~
## $ Obesity                        <chr> "?", "0", "0", "0", "0", "1", "0", "?",~
## $ Hemochromatosis                <chr> "1", "0", "0", "0", "0", "0", "?", "0",~
## $ `Arterial Hypertension`        <chr> "0", "1", "1", "1", "1", "0", "0", "0",~
## $ `Chronic Renal Insufficiency`  <chr> "0", "0", "1", "0", "1", "0", "0", "0",~
## $ `Human Immunodeficiency Virus` <chr> "0", "0", "0", "0", "0", "0", "0", "0",~
## $ `Nonalcoholic Steatohepatitis` <chr> "0", "0", "0", "0", "0", "0", "0", "0",~
## $ `Esophageal Varices`           <chr> "1", "1", "0", "0", "0", "1", "0", "0",~
## $ Splenomegaly                   <chr> "0", "0", "0", "0", "0", "1", "0", "1",~
## $ `Portal Hypertension`          <chr> "0", "0", "1", "0", "0", "1", "0", "1",~
## $ `Portal Vein Thrombosis`       <chr> "0", "0", "0", "0", "0", "0", "0", "1",~
## $ `Liver Metastasis: nominal`    <chr> "0", "0", "1", "1", "0", "0", "0", "0",~
## $ `Radiological Hallmark`        <chr> "1", "1", "1", "1", "1", "1", "1", "1",~
## $ `Performance Status`           <dbl> 0, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, ~
## $ `Encefalopathy degree`         <chr> "1", "1", "1", "1", "1", "1", "1", "1",~
## $ `Ascites degree`               <chr> "1", "1", "2", "1", "1", "2", "1", "1",~
## $ Class                          <dbl> 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, ~
  1. Menyesuaikan Tipe Variabel/Atribut
HCCKategorik<-HCCKategorik %>% mutate_if(is.numeric,as.factor)
HCCKategorik<-HCCKategorik %>% mutate_if(is.character,as.factor)
glimpse(HCCKategorik)
## Rows: 165
## Columns: 28
## $ Patients                       <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ~
## $ Gender                         <fct> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Symptoms                       <fct> 0, ?, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, ~
## $ Alcohol                        <fct> 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ~
## $ `Hepatitis B Surface Antigen`  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Hepatitis B e Antigen`        <fct> 0, 0, 0, 0, 0, ?, ?, ?, 0, 0, 0, 0, 0, ~
## $ `Hepatitis B Core Antibody`    <fct> 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, ~
## $ `Hepatitis C Virus Antibody`   <fct> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ~
## $ Cirrhosis                      <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ `Endemic Countries`            <fct> 0, ?, 0, 0, 0, 0, 0, 0, 0, 0, ?, 1, 0, ~
## $ Smoking                        <fct> 1, ?, 1, 1, 1, ?, 0, 1, 1, 0, ?, 0, 1, ~
## $ Diabetes                       <fct> 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, ~
## $ Obesity                        <fct> ?, 0, 0, 0, 0, 1, 0, ?, 0, 0, 0, 0, 0, ~
## $ Hemochromatosis                <fct> 1, 0, 0, 0, 0, 0, ?, 0, 0, 1, 0, 0, 0, ~
## $ `Arterial Hypertension`        <fct> 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, ~
## $ `Chronic Renal Insufficiency`  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Human Immunodeficiency Virus` <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Nonalcoholic Steatohepatitis` <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ?, ~
## $ `Esophageal Varices`           <fct> 1, 1, 0, 0, 0, 1, 0, 0, ?, 0, ?, ?, ?, ~
## $ Splenomegaly                   <fct> 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, ~
## $ `Portal Hypertension`          <fct> 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, ~
## $ `Portal Vein Thrombosis`       <fct> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, ~
## $ `Liver Metastasis: nominal`    <fct> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Radiological Hallmark`        <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, ~
## $ `Performance Status`           <fct> 0, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, ~
## $ `Encefalopathy degree`         <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ `Ascites degree`               <fct> 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, ~
## $ Class                          <fct> 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, ~
  1. Memperbaiki tanda ? atau missing value pada HCCKategorik
HCCKategorik<-na_if(HCCKategorik,"?")
glimpse(HCCKategorik)
## Rows: 165
## Columns: 28
## $ Patients                       <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ~
## $ Gender                         <fct> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Symptoms                       <fct> 0, NA, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0,~
## $ Alcohol                        <fct> 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ~
## $ `Hepatitis B Surface Antigen`  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Hepatitis B e Antigen`        <fct> 0, 0, 0, 0, 0, NA, NA, NA, 0, 0, 0, 0, ~
## $ `Hepatitis B Core Antibody`    <fct> 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, ~
## $ `Hepatitis C Virus Antibody`   <fct> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ~
## $ Cirrhosis                      <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ `Endemic Countries`            <fct> 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, NA, 1, 0~
## $ Smoking                        <fct> 1, NA, 1, 1, 1, NA, 0, 1, 1, 0, NA, 0, ~
## $ Diabetes                       <fct> 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, ~
## $ Obesity                        <fct> NA, 0, 0, 0, 0, 1, 0, NA, 0, 0, 0, 0, 0~
## $ Hemochromatosis                <fct> 1, 0, 0, 0, 0, 0, NA, 0, 0, 1, 0, 0, 0,~
## $ `Arterial Hypertension`        <fct> 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, ~
## $ `Chronic Renal Insufficiency`  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Human Immunodeficiency Virus` <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Nonalcoholic Steatohepatitis` <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA,~
## $ `Esophageal Varices`           <fct> 1, 1, 0, 0, 0, 1, 0, 0, NA, 0, NA, NA, ~
## $ Splenomegaly                   <fct> 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, ~
## $ `Portal Hypertension`          <fct> 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, ~
## $ `Portal Vein Thrombosis`       <fct> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, ~
## $ `Liver Metastasis: nominal`    <fct> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ `Radiological Hallmark`        <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, ~
## $ `Performance Status`           <fct> 0, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, ~
## $ `Encefalopathy degree`         <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ `Ascites degree`               <fct> 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, ~
## $ Class                          <fct> 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, ~
  1. Memeriksa Missing Value pada Dataset (Semua Atribut)
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
import <- data.frame(Test_Name = "Fir'st Last", 
                     Test_Date = "2019-01-01", 
                     Test_Number = 10)

import_sql <-import %>%
  select_if(~!all(is.na(.))) %>%
  mutate_if(is.factor, as.character) %>%
  mutate_if(is.character, trimws) %>%
  mutate_if(is.character, list(~gsub("'", "''",.))) %>% 
  mutate_if(is.character, list(~paste0("'", ., "'"))) %>%
  mutate_if(is.Date, list(~paste0("'", ., "'")))
options(repr.plot.width=6,repr.plot.height=4)
missing_data<-HCCKategorik%>%summarise_all(funs(sum(is.na(.))/n()))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
missing_data<-gather(missing_data,key="variables",value="percent_missing")
ggplot(missing_data, aes(x=reorder(variables, percent_missing),y=percent_missing))+
  geom_bar(stat="identity",fill="red",aes(color=I('white')), size=0.3)+
  xlab('variables')+
  coord_flip()+
  theme_bw()

dapat dilihat indikasi yaitu yang berwarna merah dalam beberapa atribut menunjukkan persentase missing value nya, paling banyak terdapat pada observasi dalam atribut eshopageal.Varices, dan terdapat 5 variabel yang sudah lengkap

  1. Imputasi Nilai Missing Value pada HCCKategorik cara 1 (median/modus)
imput<-compute(HCCKategorik,method="median/mode")
HCCKategorik_nonmissing<-impute(HCCKategorik,object=imput)
glimpse(HCCKategorik_nonmissing)
## Rows: 165
## Columns: 28
## $ Patients                     <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13~
## $ Gender                       <fct> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ Symptoms                     <fct> 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,~
## $ Alcohol                      <fct> 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,~
## $ Hepatitis.B.Surface.Antigen  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Hepatitis.B.e.Antigen        <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Hepatitis.B.Core.Antibody    <fct> 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ Hepatitis.C.Virus.Antibody   <fct> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,~
## $ Cirrhosis                    <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ Endemic.Countries            <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ Smoking                      <fct> 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,~
## $ Diabetes                     <fct> 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,~
## $ Obesity                      <fct> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Hemochromatosis              <fct> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~
## $ Arterial.Hypertension        <fct> 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,~
## $ Chronic.Renal.Insufficiency  <fct> 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Human.Immunodeficiency.Virus <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Nonalcoholic.Steatohepatitis <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Esophageal.Varices           <fct> 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,~
## $ Splenomegaly                 <fct> 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0,~
## $ Portal.Hypertension          <fct> 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,~
## $ Portal.Vein.Thrombosis       <fct> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,~
## $ Liver.Metastasis..nominal    <fct> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Radiological.Hallmark        <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,~
## $ Performance.Status           <fct> 0, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, 0,~
## $ Encefalopathy.degree         <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ Ascites.degree               <fct> 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1,~
## $ Class                        <fct> 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,~

Terlihat secara sekilas dari ke 11 observasi tidak ada missing value, kemudian dilanjutkan pemeriksaan kembali untuk keseluruhan observasi

  1. Memeriksa Hasil Imputasi data missing value (memastikan tidak ada misisng value)
options(repr.plot.width=6,repr.plot.height=4)
missing_data<-HCCKategorik_nonmissing %>% summarise_all(funs(sum(is.na(.))/n()))
missing_data<-gather(missing_data,key="variables",value="percent_missing")
ggplot(missing_data, aes(x=reorder(variables, percent_missing),y=percent_missing))+
  geom_bar(stat="identity",fill="red",aes(color=I('white')), size=0.3)+
  xlab('variables')+
  coord_flip()+
  theme_bw()

Berdasarkan hasil output dapat diperoleh bahwa hasil pemeriksaan sudah menunjukkan tidak terdapat missing value pada seluruh atribut

  1. Feature Engineering - Penyeleksian Variabel dari sekian banyak atribut kategorik, jika nantinya digunakan paksa keseluruhan pada model klasifikasi mungkin saja akan kurang bagus, sehingga dilakukan check
ggplot(HCCKategorik_nonmissing, aes(x=Gender,fill=Class))+ geom_bar(position='fill')+xlab("Gender")

ggplot(HCCKategorik_nonmissing, aes(x=Symptoms,fill=Class))+ geom_bar(position='fill')+xlab("Symptoms")

ggplot(HCCKategorik_nonmissing, aes(x=Alcohol,fill=Class))+ geom_bar(position='fill')+xlab("Alcohol")

ggplot(HCCKategorik_nonmissing, aes(x=Hepatitis.B.Surface.Antigen,fill=Class))+ geom_bar(position='fill')+xlab("Hepatitis.B.Surface.Antigen")

ggplot(HCCKategorik_nonmissing, aes(x=Hepatitis.B.e.Antigen,fill=Class))+ geom_bar(position='fill')+xlab("Hepatitis.B.e.Antigen")

ggplot(HCCKategorik_nonmissing, aes(x=Hepatitis.B.Core.Antibody,fill=Class))+ geom_bar(position='fill')+xlab("Hepatitis.B.Core.Antibody")

ggplot(HCCKategorik_nonmissing, aes(x=Hepatitis.C.Virus.Antibody,fill=Class))+ geom_bar(position='fill')+xlab("Hepatitis.C.Virus.Antibody")

ggplot(HCCKategorik_nonmissing, aes(x=Cirrhosis,fill=Class))+ geom_bar(position='fill')+xlab("Cirrhosis")

ggplot(HCCKategorik_nonmissing, aes(x=Endemic.Countries,fill=Class))+ geom_bar(position='fill')+xlab("Endemic.Countries")

ggplot(HCCKategorik_nonmissing, aes(x=Smoking,fill=Class))+ geom_bar(position='fill')+xlab("Smoking")

ggplot(HCCKategorik_nonmissing, aes(x=Diabetes,fill=Class))+ geom_bar(position='fill')+xlab("Diabetes")

ggplot(HCCKategorik_nonmissing, aes(x=Obesity,fill=Class))+ geom_bar(position='fill')+xlab("Obesity")

ggplot(HCCKategorik_nonmissing, aes(x=Hemochromatosis,fill=Class))+ geom_bar(position='fill')+xlab("Hemochromatosis")

ggplot(HCCKategorik_nonmissing, aes(x=Arterial.Hypertension,fill=Class))+ geom_bar(position='fill')+xlab("Arterial.Hypertension")

ggplot(HCCKategorik_nonmissing, aes(x=Chronic.Renal.Insufficiency,fill=Class))+ geom_bar(position='fill')+xlab("Chronic.Renal.Insufficiency")

ggplot(HCCKategorik_nonmissing, aes(x=Human.Immunodeficiency.Virus,fill=Class))+ geom_bar(position='fill')+xlab("Human.Immunodeficiency.Virus")

ggplot(HCCKategorik_nonmissing, aes(x=Nonalcoholic.Steatohepatitis,fill=Class))+ geom_bar(position='fill')+xlab("Nonalcoholic.Steatohepatitis")

ggplot(HCCKategorik_nonmissing, aes(x=Esophageal.Varices,fill=Class))+ geom_bar(position='fill')+xlab("Esophageal.Varices")

ggplot(HCCKategorik_nonmissing, aes(x=Splenomegaly,fill=Class))+ geom_bar(position='fill')+xlab("Splenomegaly")

ggplot(HCCKategorik_nonmissing, aes(x=Portal.Hypertension,fill=Class))+ geom_bar(position='fill')+xlab("Portal.Hypertension")

ggplot(HCCKategorik_nonmissing, aes(x=Portal.Vein.Thrombosis,fill=Class))+ geom_bar(position='fill')+xlab("Portal.Vein.Thrombosis")

ggplot(HCCKategorik_nonmissing, aes(x=Liver.Metastasis..nominal,fill=Class))+ geom_bar(position='fill')+xlab("Liver.Metastasis..nominal")

ggplot(HCCKategorik_nonmissing, aes(x=Radiological.Hallmark ,fill=Class))+ geom_bar(position='fill')+xlab("Radiological.Hallmark ")

ggplot(HCCKategorik_nonmissing, aes(x=Performance.Status,fill=Class))+ geom_bar(position='fill')+xlab("Performance.Status")

ggplot(HCCKategorik_nonmissing, aes(x=Encefalopathy.degree,fill=Class))+ geom_bar(position='fill')+xlab("Encefalopathy.degree")

ggplot(HCCKategorik_nonmissing, aes(x=Ascites.degree,fill=Class))+ geom_bar(position='fill')+xlab("Ascites.degree")

Kemudian dapat dilakuakan pemilihan atribut berdasarkan pengamatan signifikansi secara visual