setwd("~/drexel_masters/data/INFO659")
HI <- read.csv("final/HI_rate.csv")
head(HI,5)
HIMP <- HI[,c(3,9,12,14,15,16,17,18,19,20,21,22,23,24)]
head(HIMP,5)
filteredHI1 <- subset(HIMP, IndividualRate <999999.0)
newHI <- subset(filteredHI1, IndividualRate >0)
head(newHI,5)
finalHI <- subset(newHI, IndividualRate <9999.0)
head(finalHI,5)
finalHI$IndividualRate <- factor(ifelse(finalHI$IndividualRate>=100, "Expensive", "Cheap"))
head(finalHI,5)
train <- finalHI[sample(nrow(finalHI),15000),]
test <- finalHI[sample(nrow(finalHI),5000),]
library("rpart")
library("rpart.plot")
dtRate <- rpart(IndividualRate ~ Tobacco + Age + StateCode, method = "class", data = train, parms = list(split = 'information'), minsplit = 20, cp = 0.002)
rpart.plot(dtRate, type = 5, extra = 1)

dtRate
n= 15000
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 15000 6596 Expensive (0.4397333333 0.5602666667)
2) Tobacco=No Preference 9901 3310 Cheap (0.6656903343 0.3343096657)
4) StateCode=FL,GA,IN 7013 1181 Cheap (0.8315984600 0.1684015400) *
5) StateCode=AK,AL,AZ 2888 759 Expensive (0.2628116343 0.7371883657) *
3) Tobacco=Tobacco User/Non-Tobacco User 5099 5 Expensive (0.0009805844 0.9990194156) *
results <- predict(dtRate,test, type = "class")
View(results)
install.packages("caret")
Installing package into 㤼㸱C:/Users/V elena/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
also installing the dependencies 㤼㸱numDeriv㤼㸲, 㤼㸱SQUAREM㤼㸲, 㤼㸱lava㤼㸲, 㤼㸱plogr㤼㸲, 㤼㸱prodlim㤼㸲, 㤼㸱lifecycle㤼㸲, 㤼㸱iterators㤼㸲, 㤼㸱data.table㤼㸲, 㤼㸱dplyr㤼㸲, 㤼㸱generics㤼㸲, 㤼㸱gower㤼㸲, 㤼㸱ipred㤼㸲, 㤼㸱lubridate㤼㸲, 㤼㸱purrr㤼㸲, 㤼㸱tidyr㤼㸲, 㤼㸱tidyselect㤼㸲, 㤼㸱timeDate㤼㸲, 㤼㸱foreach㤼㸲, 㤼㸱ModelMetrics㤼㸲, 㤼㸱recipes㤼㸲
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/numDeriv_2016.8-1.1.zip'
Content type 'application/zip' length 116384 bytes (113 KB)
downloaded 113 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/SQUAREM_2017.10-1.zip'
Content type 'application/zip' length 293374 bytes (286 KB)
downloaded 286 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/lava_1.6.6.zip'
Content type 'application/zip' length 2190450 bytes (2.1 MB)
downloaded 2.1 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/plogr_0.2.0.zip'
Content type 'application/zip' length 18867 bytes (18 KB)
downloaded 18 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/prodlim_2019.11.13.zip'
Content type 'application/zip' length 422643 bytes (412 KB)
downloaded 412 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/lifecycle_0.1.0.zip'
Content type 'application/zip' length 84784 bytes (82 KB)
downloaded 82 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/iterators_1.0.12.zip'
Content type 'application/zip' length 343823 bytes (335 KB)
downloaded 335 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/data.table_1.12.6.zip'
Content type 'application/zip' length 2254730 bytes (2.2 MB)
downloaded 2.2 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/dplyr_0.8.3.zip'
Content type 'application/zip' length 3264375 bytes (3.1 MB)
downloaded 3.1 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/generics_0.0.2.zip'
Content type 'application/zip' length 65847 bytes (64 KB)
downloaded 64 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/gower_0.2.1.zip'
Content type 'application/zip' length 246732 bytes (240 KB)
downloaded 240 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/ipred_0.9-9.zip'
Content type 'application/zip' length 399826 bytes (390 KB)
downloaded 390 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/lubridate_1.7.4.zip'
Content type 'application/zip' length 1579751 bytes (1.5 MB)
downloaded 1.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/purrr_0.3.3.zip'
Content type 'application/zip' length 426108 bytes (416 KB)
downloaded 416 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/tidyr_1.0.0.zip'
Content type 'application/zip' length 1296306 bytes (1.2 MB)
downloaded 1.2 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/tidyselect_0.2.5.zip'
Content type 'application/zip' length 627245 bytes (612 KB)
downloaded 612 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/timeDate_3043.102.zip'
Content type 'application/zip' length 1552467 bytes (1.5 MB)
downloaded 1.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/foreach_1.4.7.zip'
Content type 'application/zip' length 419935 bytes (410 KB)
downloaded 410 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/ModelMetrics_1.2.2.zip'
Content type 'application/zip' length 666089 bytes (650 KB)
downloaded 650 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/recipes_0.1.7.zip'
Content type 'application/zip' length 1564471 bytes (1.5 MB)
downloaded 1.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/caret_6.0-84.zip'
Content type 'application/zip' length 6237600 bytes (5.9 MB)
downloaded 5.9 MB
package ‘numDeriv’ successfully unpacked and MD5 sums checked
package ‘SQUAREM’ successfully unpacked and MD5 sums checked
package ‘lava’ successfully unpacked and MD5 sums checked
package ‘plogr’ successfully unpacked and MD5 sums checked
package ‘prodlim’ successfully unpacked and MD5 sums checked
package ‘lifecycle’ successfully unpacked and MD5 sums checked
package ‘iterators’ successfully unpacked and MD5 sums checked
package ‘data.table’ successfully unpacked and MD5 sums checked
package ‘dplyr’ successfully unpacked and MD5 sums checked
package ‘generics’ successfully unpacked and MD5 sums checked
package ‘gower’ successfully unpacked and MD5 sums checked
package ‘ipred’ successfully unpacked and MD5 sums checked
package ‘lubridate’ successfully unpacked and MD5 sums checked
package ‘purrr’ successfully unpacked and MD5 sums checked
package ‘tidyr’ successfully unpacked and MD5 sums checked
package ‘tidyselect’ successfully unpacked and MD5 sums checked
package ‘timeDate’ successfully unpacked and MD5 sums checked
package ‘foreach’ successfully unpacked and MD5 sums checked
package ‘ModelMetrics’ successfully unpacked and MD5 sums checked
package ‘recipes’ successfully unpacked and MD5 sums checked
package ‘caret’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\V elena\AppData\Local\Temp\RtmpE3f06O\downloaded_packages
library(ggplot2)
library(lattice)
library(caret)
confusionMatrix(results, test$IndividualRate, mode = "everything")
Confusion Matrix and Statistics
Reference
Prediction Cheap Expensive
Cheap 1942 381
Expensive 268 2409
Accuracy : 0.8702
95% CI : (0.8606, 0.8794)
No Information Rate : 0.558
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7383
Mcnemar's Test P-Value : 1.101e-05
Sensitivity : 0.8787
Specificity : 0.8634
Pos Pred Value : 0.8360
Neg Pred Value : 0.8999
Precision : 0.8360
Recall : 0.8787
F1 : 0.8568
Prevalence : 0.4420
Detection Rate : 0.3884
Detection Prevalence : 0.4646
Balanced Accuracy : 0.8711
'Positive' Class : Cheap
LS0tDQp0aXRsZTogIkRlY2lzaW9uIFRyZWUiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KYGBge3J9DQpzZXR3ZCgifi9kcmV4ZWxfbWFzdGVycy9kYXRhL0lORk82NTkiKQ0KYGBgDQoNCmBgYHtyfQ0KSEkgPC0gcmVhZC5jc3YoImZpbmFsL0hJX3JhdGUuY3N2IikNCmhlYWQoSEksNSkNCmBgYA0KDQpgYGB7cn0NCkhJTVAgPC0gSElbLGMoMyw5LDEyLDE0LDE1LDE2LDE3LDE4LDE5LDIwLDIxLDIyLDIzLDI0KV0NCmBgYA0KDQpgYGB7cn0NCmhlYWQoSElNUCw1KQ0KYGBgDQoNCmBgYHtyfQ0KZmlsdGVyZWRISTEgPC0gc3Vic2V0KEhJTVAsIEluZGl2aWR1YWxSYXRlIDw5OTk5OTkuMCkNCm5ld0hJIDwtIHN1YnNldChmaWx0ZXJlZEhJMSwgSW5kaXZpZHVhbFJhdGUgPjApDQpoZWFkKG5ld0hJLDUpDQpgYGANCmBgYHtyfQ0KZmluYWxISSA8LSBzdWJzZXQobmV3SEksIEluZGl2aWR1YWxSYXRlIDw5OTk5LjApDQpoZWFkKGZpbmFsSEksNSkNCmBgYA0KDQoNCmBgYHtyfQ0KZmluYWxISSRJbmRpdmlkdWFsUmF0ZSA8LSBmYWN0b3IoaWZlbHNlKGZpbmFsSEkkSW5kaXZpZHVhbFJhdGU+PTEwMCwgIkV4cGVuc2l2ZSIsICJDaGVhcCIpKQ0KYGBgDQpgYGB7cn0NCmhlYWQoZmluYWxISSw1KQ0KYGBgDQoNCmBgYHtyfQ0KdHJhaW4gPC0gZmluYWxISVtzYW1wbGUobnJvdyhmaW5hbEhJKSwxNTAwMCksXQ0KdGVzdCA8LSBmaW5hbEhJW3NhbXBsZShucm93KGZpbmFsSEkpLDUwMDApLF0NCg0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeSgicnBhcnQiKQ0KbGlicmFyeSgicnBhcnQucGxvdCIpDQpgYGANCg0KYGBge3J9DQpkdFJhdGUgPC0gcnBhcnQoSW5kaXZpZHVhbFJhdGUgfiBUb2JhY2NvICsgQWdlICsgU3RhdGVDb2RlLCBtZXRob2QgPSAiY2xhc3MiLCBkYXRhID0gdHJhaW4sIHBhcm1zID0gbGlzdChzcGxpdCA9ICdpbmZvcm1hdGlvbicpLCBtaW5zcGxpdCA9IDIwLCBjcCA9IDAuMDAyKQ0KcnBhcnQucGxvdChkdFJhdGUsIHR5cGUgPSA1LCBleHRyYSA9IDEpDQpgYGANCg0KYGBge3J9DQpkdFJhdGUNCmBgYA0KDQpgYGB7cn0NCnJlc3VsdHMgPC0gcHJlZGljdChkdFJhdGUsdGVzdCwgdHlwZSA9ICJjbGFzcyIpDQpWaWV3KHJlc3VsdHMpDQpgYGANCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJjYXJldCIpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGxhdHRpY2UpDQpsaWJyYXJ5KGNhcmV0KQ0KDQpgYGANCg0KYGBge3J9DQpjb25mdXNpb25NYXRyaXgocmVzdWx0cywgdGVzdCRJbmRpdmlkdWFsUmF0ZSwgbW9kZSA9ICJldmVyeXRoaW5nIikNCmBgYA0KDQo=