This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

library(earth)
data(etitanic)
head(model.matrix(survived ~ ., data = etitanic))
dummies <- dummyVars(survived ~ ., data = etitanic)
head(predict(dummies, newdata = etitanic))
data(mdrr)
data.frame(table(mdrrDescr$nR11))
nzv <- nearZeroVar(mdrrDescr, saveMetrics= TRUE)
nzv[nzv$nzv,][1:10,]
dim(mdrrDescr)
[1] 528 342
nzv <- nearZeroVar(mdrrDescr)
filteredDescr <- mdrrDescr[, -nzv]
dim(filteredDescr)
[1] 528 297
descrCor <-  cor(filteredDescr)
highCorr <- sum(abs(descrCor[upper.tri(descrCor)]) > .999)
descrCor <- cor(filteredDescr)
summary(descrCor[upper.tri(descrCor)])
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.99610 -0.05373  0.25010  0.26080  0.65530  1.00000 
highlyCorDescr <- findCorrelation(descrCor, cutoff = .75)
filteredDescr <- filteredDescr[,-highlyCorDescr]
descrCor2 <- cor(filteredDescr)
summary(descrCor2[upper.tri(descrCor2)])
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.70730 -0.05378  0.04418  0.06692  0.18860  0.74460 
ltfrDesign <- matrix(0, nrow=6, ncol=6)
ltfrDesign[,1] <- c(1, 1, 1, 1, 1, 1)
ltfrDesign[,2] <- c(1, 1, 1, 0, 0, 0)
ltfrDesign[,3] <- c(0, 0, 0, 1, 1, 1)
ltfrDesign[,4] <- c(1, 0, 0, 1, 0, 0)
ltfrDesign[,5] <- c(0, 1, 0, 0, 1, 0)
ltfrDesign[,6] <- c(0, 0, 1, 0, 0, 1)
comboInfo <- findLinearCombos(ltfrDesign)
comboInfo
$linearCombos
$linearCombos[[1]]
[1] 3 1 2

$linearCombos[[2]]
[1] 6 1 4 5


$remove
[1] 3 6
ltfrDesign[, -comboInfo$remove]
     [,1] [,2] [,3] [,4]
[1,]    1    1    1    0
[2,]    1    1    0    1
[3,]    1    1    0    0
[4,]    1    0    1    0
[5,]    1    0    0    1
[6,]    1    0    0    0
set.seed(96)
inTrain <- sample(seq(along = mdrrClass), length(mdrrClass)/2)
training <- filteredDescr[inTrain,]
test <- filteredDescr[-inTrain,]
trainMDRR <- mdrrClass[inTrain]
testMDRR <- mdrrClass[-inTrain]
preProcValues <- preProcess(training, method = c("center", "scale"))
trainTransformed <- predict(preProcValues, training)
testTransformed <- predict(preProcValues, test)
library(AppliedPredictiveModeling)
transparentTheme(trans = .4)
plotSubset <- data.frame(scale(mdrrDescr[, c("nC", "X4v")])) 
xyplot(nC ~ X4v,
       data = plotSubset,
       groups = mdrrClass, 
       auto.key = list(columns = 2))  

transformed <- spatialSign(plotSubset)
transformed <- as.data.frame(transformed)
xyplot(nC ~ X4v, 
       data = transformed, 
       groups = mdrrClass, 
       auto.key = list(columns = 2)) 

preProcValues2 <- preProcess(training, method = "BoxCox")
trainBC <- predict(preProcValues2, training)
testBC <- predict(preProcValues2, test)
preProcValues2
Created from 264 samples and 31 variables

Pre-processing:
  - Box-Cox transformation (31)
  - ignored (0)

Lambda estimates for Box-Cox transformation:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.0000 -0.2000  0.3000  0.4097  1.7000  2.0000 
library(AppliedPredictiveModeling)
data(schedulingData)
str(schedulingData)
'data.frame':   4331 obs. of  8 variables:
 $ Protocol   : Factor w/ 14 levels "A","C","D","E",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ Compounds  : num  997 97 101 93 100 100 105 98 101 95 ...
 $ InputFields: num  137 103 75 76 82 82 88 95 91 92 ...
 $ Iterations : num  20 20 10 20 20 20 20 20 20 20 ...
 $ NumPending : num  0 0 0 0 0 0 0 0 0 0 ...
 $ Hour       : num  14 13.8 13.8 10.1 10.4 ...
 $ Day        : Factor w/ 7 levels "Mon","Tue","Wed",..: 2 2 4 5 5 3 5 5 5 3 ...
 $ Class      : Factor w/ 4 levels "VF","F","M","L": 2 1 1 1 1 1 1 1 1 1 ...
pp_hpc <- preProcess(schedulingData[, -8], 
                     method = c("center", "scale", "YeoJohnson"))
pp_hpc
Created from 4331 samples and 7 variables

Pre-processing:
  - centered (5)
  - ignored (2)
  - scaled (5)
  - Yeo-Johnson transformation (5)

Lambda estimates for Yeo-Johnson transformation:
-0.08, -0.03, -1.05, -1.1, 1.44
transformed <- predict(pp_hpc, newdata = schedulingData[, -8])
head(transformed)
mean(schedulingData$NumPending == 0)
[1] 0.7561764
pp_no_nzv <- preProcess(schedulingData[, -8], 
                        method = c("center", "scale", "YeoJohnson", "nzv"))
pp_no_nzv
Created from 4331 samples and 7 variables

Pre-processing:
  - centered (4)
  - ignored (2)
  - removed (1)
  - scaled (4)
  - Yeo-Johnson transformation (4)

Lambda estimates for Yeo-Johnson transformation:
-0.08, -0.03, -1.05, 1.44
predict(pp_no_nzv, newdata = schedulingData[1:6, -8])
centroids <- classDist(trainBC, trainMDRR)
distances <- predict(centroids, testBC)
distances <- as.data.frame(distances)
head(distances)
xyplot(dist.Active ~ dist.Inactive,
       data = distances, 
       groups = testMDRR, 
       auto.key = list(columns = 2))

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2sgXyBQcmUtUHJvY2Vzc2luZyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQpgYGB7cn0NCmxpYnJhcnkoY2FyZXQpDQpgYGANCg0KQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLg0KDQpXaGVuIHlvdSBzYXZlIHRoZSBub3RlYm9vaywgYW4gSFRNTCBmaWxlIGNvbnRhaW5pbmcgdGhlIGNvZGUgYW5kIG91dHB1dCB3aWxsIGJlIHNhdmVkIGFsb25nc2lkZSBpdCAoY2xpY2sgdGhlICpQcmV2aWV3KiBidXR0b24gb3IgcHJlc3MgKkN0cmwrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4NCg0KYGBge3IgTGliTG9hZH0NCmxpYnJhcnkoZWFydGgpDQpkYXRhKGV0aXRhbmljKQ0KaGVhZChtb2RlbC5tYXRyaXgoc3Vydml2ZWQgfiAuLCBkYXRhID0gZXRpdGFuaWMpKQ0KYGBgDQoNCmBgYHtyIFVzaW5nICBkdW1teVZhcnN9DQpkdW1taWVzIDwtIGR1bW15VmFycyhzdXJ2aXZlZCB+IC4sIGRhdGEgPSBldGl0YW5pYykNCmhlYWQocHJlZGljdChkdW1taWVzLCBuZXdkYXRhID0gZXRpdGFuaWMpKQ0KYGBgDQoNCmBgYHtyIFplcm8tIGFuZCBOZWFyIFplcm8tVmFyaWFuY2UgUHJlZGljdG9yc30NCmRhdGEobWRycikNCmRhdGEuZnJhbWUodGFibGUobWRyckRlc2NyJG5SMTEpKQ0Kbnp2IDwtIG5lYXJaZXJvVmFyKG1kcnJEZXNjciwgc2F2ZU1ldHJpY3M9IFRSVUUpDQpuenZbbnp2JG56dixdWzE6MTAsXQ0KZGltKG1kcnJEZXNjcikNCm56diA8LSBuZWFyWmVyb1ZhcihtZHJyRGVzY3IpDQpmaWx0ZXJlZERlc2NyIDwtIG1kcnJEZXNjclssIC1uenZdDQpkaW0oZmlsdGVyZWREZXNjcikNCmBgYA0KDQpgYGB7ciBJZGVudGlmeWluZyBDb3JyZWxhdGVkIFByZWRpY3RvcnN9DQpkZXNjckNvciA8LSAgY29yKGZpbHRlcmVkRGVzY3IpDQpoaWdoQ29yciA8LSBzdW0oYWJzKGRlc2NyQ29yW3VwcGVyLnRyaShkZXNjckNvcildKSA+IC45OTkpDQpkZXNjckNvciA8LSBjb3IoZmlsdGVyZWREZXNjcikNCnN1bW1hcnkoZGVzY3JDb3JbdXBwZXIudHJpKGRlc2NyQ29yKV0pDQpoaWdobHlDb3JEZXNjciA8LSBmaW5kQ29ycmVsYXRpb24oZGVzY3JDb3IsIGN1dG9mZiA9IC43NSkNCmZpbHRlcmVkRGVzY3IgPC0gZmlsdGVyZWREZXNjclssLWhpZ2hseUNvckRlc2NyXQ0KZGVzY3JDb3IyIDwtIGNvcihmaWx0ZXJlZERlc2NyKQ0Kc3VtbWFyeShkZXNjckNvcjJbdXBwZXIudHJpKGRlc2NyQ29yMildKQ0KYGBgDQoNCmBgYHtyIExpbmVhciBEZXBlbmRlbmNpZXN9DQpsdGZyRGVzaWduIDwtIG1hdHJpeCgwLCBucm93PTYsIG5jb2w9NikNCmx0ZnJEZXNpZ25bLDFdIDwtIGMoMSwgMSwgMSwgMSwgMSwgMSkNCmx0ZnJEZXNpZ25bLDJdIDwtIGMoMSwgMSwgMSwgMCwgMCwgMCkNCmx0ZnJEZXNpZ25bLDNdIDwtIGMoMCwgMCwgMCwgMSwgMSwgMSkNCmx0ZnJEZXNpZ25bLDRdIDwtIGMoMSwgMCwgMCwgMSwgMCwgMCkNCmx0ZnJEZXNpZ25bLDVdIDwtIGMoMCwgMSwgMCwgMCwgMSwgMCkNCmx0ZnJEZXNpZ25bLDZdIDwtIGMoMCwgMCwgMSwgMCwgMCwgMSkNCmNvbWJvSW5mbyA8LSBmaW5kTGluZWFyQ29tYm9zKGx0ZnJEZXNpZ24pDQpjb21ib0luZm8NCmx0ZnJEZXNpZ25bLCAtY29tYm9JbmZvJHJlbW92ZV0NCmBgYA0KDQpgYGB7ciBDZW50ZXJpbmcgYW5kIFNjYWxpbmd9DQpzZXQuc2VlZCg5NikNCmluVHJhaW4gPC0gc2FtcGxlKHNlcShhbG9uZyA9IG1kcnJDbGFzcyksIGxlbmd0aChtZHJyQ2xhc3MpLzIpDQoNCnRyYWluaW5nIDwtIGZpbHRlcmVkRGVzY3JbaW5UcmFpbixdDQp0ZXN0IDwtIGZpbHRlcmVkRGVzY3JbLWluVHJhaW4sXQ0KdHJhaW5NRFJSIDwtIG1kcnJDbGFzc1tpblRyYWluXQ0KdGVzdE1EUlIgPC0gbWRyckNsYXNzWy1pblRyYWluXQ0KDQpwcmVQcm9jVmFsdWVzIDwtIHByZVByb2Nlc3ModHJhaW5pbmcsIG1ldGhvZCA9IGMoImNlbnRlciIsICJzY2FsZSIpKQ0KDQp0cmFpblRyYW5zZm9ybWVkIDwtIHByZWRpY3QocHJlUHJvY1ZhbHVlcywgdHJhaW5pbmcpDQp0ZXN0VHJhbnNmb3JtZWQgPC0gcHJlZGljdChwcmVQcm9jVmFsdWVzLCB0ZXN0KQ0KYGBgDQoNCmBgYHtyIFRyYW5zZm9ybWluZyBQcmVkaWN0b3JzfQ0KbGlicmFyeShBcHBsaWVkUHJlZGljdGl2ZU1vZGVsaW5nKQ0KdHJhbnNwYXJlbnRUaGVtZSh0cmFucyA9IC40KQ0KcGxvdFN1YnNldCA8LSBkYXRhLmZyYW1lKHNjYWxlKG1kcnJEZXNjclssIGMoIm5DIiwgIlg0diIpXSkpIA0KeHlwbG90KG5DIH4gWDR2LA0KICAgICAgIGRhdGEgPSBwbG90U3Vic2V0LA0KICAgICAgIGdyb3VwcyA9IG1kcnJDbGFzcywgDQogICAgICAgYXV0by5rZXkgPSBsaXN0KGNvbHVtbnMgPSAyKSkgIA0KdHJhbnNmb3JtZWQgPC0gc3BhdGlhbFNpZ24ocGxvdFN1YnNldCkNCnRyYW5zZm9ybWVkIDwtIGFzLmRhdGEuZnJhbWUodHJhbnNmb3JtZWQpDQp4eXBsb3QobkMgfiBYNHYsIA0KICAgICAgIGRhdGEgPSB0cmFuc2Zvcm1lZCwgDQogICAgICAgZ3JvdXBzID0gbWRyckNsYXNzLCANCiAgICAgICBhdXRvLmtleSA9IGxpc3QoY29sdW1ucyA9IDIpKSANCnByZVByb2NWYWx1ZXMyIDwtIHByZVByb2Nlc3ModHJhaW5pbmcsIG1ldGhvZCA9ICJCb3hDb3giKQ0KdHJhaW5CQyA8LSBwcmVkaWN0KHByZVByb2NWYWx1ZXMyLCB0cmFpbmluZykNCnRlc3RCQyA8LSBwcmVkaWN0KHByZVByb2NWYWx1ZXMyLCB0ZXN0KQ0KcHJlUHJvY1ZhbHVlczINCmBgYA0KDQpgYGB7ciBQdXR0aW5nIEl0IEFsbCBUb2dldGhlcn0NCmxpYnJhcnkoQXBwbGllZFByZWRpY3RpdmVNb2RlbGluZykNCmRhdGEoc2NoZWR1bGluZ0RhdGEpDQpzdHIoc2NoZWR1bGluZ0RhdGEpDQpwcF9ocGMgPC0gcHJlUHJvY2VzcyhzY2hlZHVsaW5nRGF0YVssIC04XSwgDQogICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSBjKCJjZW50ZXIiLCAic2NhbGUiLCAiWWVvSm9obnNvbiIpKQ0KcHBfaHBjDQp0cmFuc2Zvcm1lZCA8LSBwcmVkaWN0KHBwX2hwYywgbmV3ZGF0YSA9IHNjaGVkdWxpbmdEYXRhWywgLThdKQ0KaGVhZCh0cmFuc2Zvcm1lZCkNCm1lYW4oc2NoZWR1bGluZ0RhdGEkTnVtUGVuZGluZyA9PSAwKQ0KcHBfbm9fbnp2IDwtIHByZVByb2Nlc3Moc2NoZWR1bGluZ0RhdGFbLCAtOF0sIA0KICAgICAgICAgICAgICAgICAgICAgICAgbWV0aG9kID0gYygiY2VudGVyIiwgInNjYWxlIiwgIlllb0pvaG5zb24iLCAibnp2IikpDQpwcF9ub19uenYNCnByZWRpY3QocHBfbm9fbnp2LCBuZXdkYXRhID0gc2NoZWR1bGluZ0RhdGFbMTo2LCAtOF0pDQpgYGANCg0KYGBge3IgQ2xhc3MgRGlzdGFuY2UgQ2FsY3VsYXRpb25zfQ0KY2VudHJvaWRzIDwtIGNsYXNzRGlzdCh0cmFpbkJDLCB0cmFpbk1EUlIpDQpkaXN0YW5jZXMgPC0gcHJlZGljdChjZW50cm9pZHMsIHRlc3RCQykNCmRpc3RhbmNlcyA8LSBhcy5kYXRhLmZyYW1lKGRpc3RhbmNlcykNCmhlYWQoZGlzdGFuY2VzKQ0KeHlwbG90KGRpc3QuQWN0aXZlIH4gZGlzdC5JbmFjdGl2ZSwNCiAgICAgICBkYXRhID0gZGlzdGFuY2VzLCANCiAgICAgICBncm91cHMgPSB0ZXN0TURSUiwgDQogICAgICAgYXV0by5rZXkgPSBsaXN0KGNvbHVtbnMgPSAyKSkNCmBgYA0KDQo=