Description

The birthwt data frame has 189 rows and 10 columns. The data were collected at Baystate Medical Center, Springfield, Mass during 1986. Usage

birthwt

Format

This data frame contains the following columns:

low indicator of birth weight less than 2.5 kg.

age mother’s age in years.

lwt mother’s weight in pounds at last menstrual period.

race mother’s race (1 = white, 2 = black, 3 = other).

smoke smoking status during pregnancy.

ptl number of previous premature labours.

ht history of hypertension.

ui presence of uterine irritability.

ftv number of physician visits during the first trimester.

bwt birth weight in grams.

library(MASS)  #birthwt {MASS} 
library(rpart) #to fit decision tree model
data("birthwt")
head(birthwt)

check the number of unique values

apply(birthwt,2, function(x) round(length(unique(x))/nrow(birthwt),3)*100)
  low   age   lwt  race smoke   ptl    ht    ui   ftv   bwt 
  1.1  12.7  39.7   1.6   1.1   2.1   1.1   1.1   3.2  69.3 
col <- c(1,4:9)
 for (i in col) {
   birthwt[,i] = as.factor(birthwt[,i])
 }
str(birthwt)
'data.frame':   189 obs. of  10 variables:
 $ low  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
 $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
 $ race : Factor w/ 3 levels "1","2","3": 2 3 1 1 1 3 1 3 1 1 ...
 $ smoke: Factor w/ 2 levels "0","1": 1 1 2 2 2 1 1 1 2 2 ...
 $ ptl  : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
 $ ht   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ ui   : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 1 1 1 ...
 $ ftv  : Factor w/ 6 levels "0","1","2","3",..: 1 4 2 3 1 1 2 2 2 1 ...
 $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
any(is.na(birthwt))
[1] FALSE

target variable is low

summary(birthwt)
 low          age             lwt        race   smoke   ptl     ht      ui      ftv    
 0:130   Min.   :14.00   Min.   : 80.0   1:96   0:115   0:159   0:177   0:161   0:100  
 1: 59   1st Qu.:19.00   1st Qu.:110.0   2:26   1: 74   1: 24   1: 12   1: 28   1: 47  
         Median :23.00   Median :121.0   3:67           2:  5                   2: 30  
         Mean   :23.24   Mean   :129.8                  3:  1                   3:  7  
         3rd Qu.:26.00   3rd Qu.:140.0                                          4:  4  
         Max.   :45.00   Max.   :250.0                                          6:  1  
      bwt      
 Min.   : 709  
 1st Qu.:2414  
 Median :2977  
 Mean   :2945  
 3rd Qu.:3487  
 Max.   :4990  

lets split data into traning and test data set.

set.seed(1234)
library(caTools)
index <- sample.split(Y = birthwt$low, SplitRatio = 0.80)
train <- birthwt[index,]
test<- birthwt[!index,]

fitting the model

tree <- rpart(low~.-bwt,  data = train, method = 'class')
plot(tree)
text(tree, pretty = 1)

rpart.plot(tree)

pred <- predict(tree, test, type = "class")
pred
 89  99 101 112 114 115 123 126 127 137 145 147 148 163 166 174 180 184 186 209 212 213 216 218 219 
  0   1   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0 
220   4  15  17  20  23  30  35  37  42  56  79  82 
  0   1   1   1   0   0   1   1   1   1   1   0   1 
Levels: 0 1
tab <- table(pred, actual= test$low)
tab
    actual
pred  0  1
   0 23  3
   1  3  9

Accuracy matric

sum(diag(tab))/sum(tab)
[1] 0.8421053

misclassification error

1-sum(diag(tab))/sum(tab)
[1] 0.1578947

plotting ROC and claculating Auc matric

library(pROC)
prd <- predict(tree,test, type = "prob")
head(prd,10)
            0         1
89  0.8142857 0.1857143
99  0.3888889 0.6111111
101 0.8142857 0.1857143
112 0.8142857 0.1857143
114 0.8142857 0.1857143
115 0.6800000 0.3200000
123 0.8142857 0.1857143
126 0.8142857 0.1857143
127 0.8142857 0.1857143
137 0.4615385 0.5384615
plot(roc(test$low, prd[,2],percent = T))
Setting levels: control = 0, case = 1
Setting direction: controls < cases

print(auc)
Area under the curve: 0.7788
LS0tDQp0aXRsZTogIlJpc2sgRmFjdG9ycyBBc3NvY2lhdGVkIHdpdGggTG93IEluZmFudCBCaXJ0aCBXZWlnaHQiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCkRlc2NyaXB0aW9uDQoNClRoZSBiaXJ0aHd0IGRhdGEgZnJhbWUgaGFzIDE4OSByb3dzIGFuZCAxMCBjb2x1bW5zLiBUaGUgZGF0YSB3ZXJlIGNvbGxlY3RlZCBhdCBCYXlzdGF0ZSBNZWRpY2FsIENlbnRlciwgU3ByaW5nZmllbGQsIE1hc3MgZHVyaW5nIDE5ODYuDQpVc2FnZQ0KDQpiaXJ0aHd0DQoNCkZvcm1hdA0KDQpUaGlzIGRhdGEgZnJhbWUgY29udGFpbnMgdGhlIGZvbGxvd2luZyBjb2x1bW5zOg0KDQpsb3cNCmluZGljYXRvciBvZiBiaXJ0aCB3ZWlnaHQgbGVzcyB0aGFuIDIuNSBrZy4NCg0KYWdlDQptb3RoZXIncyBhZ2UgaW4geWVhcnMuDQoNCmx3dA0KbW90aGVyJ3Mgd2VpZ2h0IGluIHBvdW5kcyBhdCBsYXN0IG1lbnN0cnVhbCBwZXJpb2QuDQoNCnJhY2UNCm1vdGhlcidzIHJhY2UgKDEgPSB3aGl0ZSwgMiA9IGJsYWNrLCAzID0gb3RoZXIpLg0KDQpzbW9rZQ0Kc21va2luZyBzdGF0dXMgZHVyaW5nIHByZWduYW5jeS4NCg0KcHRsDQpudW1iZXIgb2YgcHJldmlvdXMgcHJlbWF0dXJlIGxhYm91cnMuDQoNCmh0DQpoaXN0b3J5IG9mIGh5cGVydGVuc2lvbi4NCg0KdWkNCnByZXNlbmNlIG9mIHV0ZXJpbmUgaXJyaXRhYmlsaXR5Lg0KDQpmdHYNCm51bWJlciBvZiBwaHlzaWNpYW4gdmlzaXRzIGR1cmluZyB0aGUgZmlyc3QgdHJpbWVzdGVyLg0KDQpid3QNCmJpcnRoIHdlaWdodCBpbiBncmFtcy4NCg0KYGBge3J9DQpsaWJyYXJ5KE1BU1MpICAjYmlydGh3dCB7TUFTU30gDQpsaWJyYXJ5KHJwYXJ0KSAjdG8gZml0IGRlY2lzaW9uIHRyZWUgbW9kZWwNCmBgYA0KDQpgYGB7cn0NCmRhdGEoImJpcnRod3QiKQ0KYGBgDQoNCmBgYHtyfQ0KaGVhZChiaXJ0aHd0KQ0KYGBgDQpjaGVjayB0aGUgbnVtYmVyIG9mIHVuaXF1ZSB2YWx1ZXMNCmBgYHtyfQ0KYXBwbHkoYmlydGh3dCwyLCBmdW5jdGlvbih4KSByb3VuZChsZW5ndGgodW5pcXVlKHgpKS9ucm93KGJpcnRod3QpLDMpKjEwMCkNCmBgYA0KDQpgYGB7cn0NCmNvbCA8LSBjKDEsNDo5KQ0KIGZvciAoaSBpbiBjb2wpIHsNCiAgIGJpcnRod3RbLGldID0gYXMuZmFjdG9yKGJpcnRod3RbLGldKQ0KIH0NCnN0cihiaXJ0aHd0KQ0KYGBgDQoNCmBgYHtyfQ0KYW55KGlzLm5hKGJpcnRod3QpKQ0KYGBgDQp0YXJnZXQgdmFyaWFibGUgaXMgbG93DQpgYGB7cn0NCnN1bW1hcnkoYmlydGh3dCkNCmBgYA0KbGV0cyBzcGxpdCBkYXRhIGludG8gdHJhbmluZyBhbmQgdGVzdCBkYXRhIHNldC4gDQpgYGB7cn0NCnNldC5zZWVkKDEyMzQpDQpsaWJyYXJ5KGNhVG9vbHMpDQppbmRleCA8LSBzYW1wbGUuc3BsaXQoWSA9IGJpcnRod3QkbG93LCBTcGxpdFJhdGlvID0gMC44MCkNCnRyYWluIDwtIGJpcnRod3RbaW5kZXgsXQ0KdGVzdDwtIGJpcnRod3RbIWluZGV4LF0NCmBgYA0KZml0dGluZyB0aGUgbW9kZWwNCmBgYHtyfQ0KdHJlZSA8LSBycGFydChsb3d+Li1id3QsICBkYXRhID0gdHJhaW4sIG1ldGhvZCA9ICdjbGFzcycpDQpwbG90KHRyZWUpDQp0ZXh0KHRyZWUsIHByZXR0eSA9IDEpDQpgYGANCg0KYGBge3J9DQpycGFydC5wbG90KHRyZWUpDQpgYGANCg0KYGBge3J9DQpwcmVkIDwtIHByZWRpY3QodHJlZSwgdGVzdCwgdHlwZSA9ICJjbGFzcyIpDQpwcmVkDQpgYGANCg0KYGBge3J9DQp0YWIgPC0gdGFibGUocHJlZCwgYWN0dWFsPSB0ZXN0JGxvdykNCnRhYg0KYGBgDQpBY2N1cmFjeSBtYXRyaWMNCmBgYHtyfQ0Kc3VtKGRpYWcodGFiKSkvc3VtKHRhYikNCmBgYA0KDQptaXNjbGFzc2lmaWNhdGlvbiBlcnJvciANCmBgYHtyfQ0KMS1zdW0oZGlhZyh0YWIpKS9zdW0odGFiKQ0KYGBgDQpwbG90dGluZyBST0MgYW5kIGNsYWN1bGF0aW5nIEF1YyBtYXRyaWMgDQpgYGB7cn0NCmxpYnJhcnkocFJPQykNCnByZCA8LSBwcmVkaWN0KHRyZWUsdGVzdCwgdHlwZSA9ICJwcm9iIikNCmhlYWQocHJkLDEwKQ0KYGBgDQoNCmBgYHtyfQ0KYXVjIDwtIGF1Yyh0ZXN0JGxvdywgcHJkWywyXSkNCnBsb3Qocm9jKHRlc3QkbG93LCBwcmRbLDJdLHBlcmNlbnQgPSBUKSkNCmBgYA0KDQpgYGB7cn0NCnByaW50KGF1YykNCmBgYA0KDQo=