hepatitis <- read.csv("C:/Users/dailo/Downloads/hepatitis.csv")
head(hepatitis)
## Gender YearBirth YearTrans RouteTrans IV Tx Coc Tatt HBV HIV EtOH Obes Rx Tox
## 1 0 1905 1970 Coc 0 0 1 0 0 0 0 0 0 0
## 2 1 1958 1992 Coc 0 0 1 0 0 0 0 0 0 0
## 3 1 1957 1977 Coc 0 0 1 0 0 0 0 0 0 0
## 4 0 1945 1968 Coc 0 0 1 1 0 0 1 0 0 0
## 5 1 1956 1979 Coc 0 0 1 0 0 0 0 0 0 0
## 6 0 1939 1973 IV 1 0 0 0 0 0 1 0 0 0
## CLD LFT YWOD Age Bx
## 1 0 1 65 97 I
## 2 0 0 34 44 I
## 3 0 0 20 45 II
## 4 0 0 23 57 II
## 5 0 1 23 46 I
## 6 0 1 34 63 II
hepatitis[,20]=hepatitis[,18]-hepatitis[,17]
colnames(hepatitis)[20]= "YWD"
head(hepatitis[1:3,])
## Gender YearBirth YearTrans RouteTrans IV Tx Coc Tatt HBV HIV EtOH Obes Rx Tox
## 1 0 1905 1970 Coc 0 0 1 0 0 0 0 0 0 0
## 2 1 1958 1992 Coc 0 0 1 0 0 0 0 0 0 0
## 3 1 1957 1977 Coc 0 0 1 0 0 0 0 0 0 0
## CLD LFT YWOD Age Bx YWD
## 1 0 1 65 97 I 32
## 2 0 0 34 44 I 10
## 3 0 0 20 45 II 25
hh=hepatitis[,c(-2:-4,-17)]
hh[1:3,]
## Gender IV Tx Coc Tatt HBV HIV EtOH Obes Rx Tox CLD LFT Age Bx YWD
## 1 0 0 0 1 0 0 0 0 0 0 0 0 1 97 I 32
## 2 1 0 0 1 0 0 0 0 0 0 0 0 0 44 I 10
## 3 1 0 0 1 0 0 0 0 0 0 0 0 0 45 II 25
for (j in 1:13) {
hh[,j]=factor(hh[,j])
}
hh[1:3,]
## Gender IV Tx Coc Tatt HBV HIV EtOH Obes Rx Tox CLD LFT Age Bx YWD
## 1 0 0 0 1 0 0 0 0 0 0 0 0 1 97 I 32
## 2 1 0 0 1 0 0 0 0 0 0 0 0 0 44 I 10
## 3 1 0 0 1 0 0 0 0 0 0 0 0 0 45 II 25
levels(hh[,6])
## [1] "0" "1"
levels(hh[,8])
## [1] "0" "1"
levels(hh[,13])
## [1] "0" "1"
library(rpart)
# Build decision tree
heptree <- rpart(Bx ~ ., data = hh)
# Display decision tree
plot(heptree)
text(heptree)
