This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(caret)
library(mlbench)
library(Hmisc)
Carico il pacchetto richiesto: survival
Attaching package: 㤼㸱survival㤼㸲
The following object is masked from 㤼㸱package:caret㤼㸲:
cluster
Carico il pacchetto richiesto: Formula
Attaching package: 㤼㸱Hmisc㤼㸲
The following objects are masked from 㤼㸱package:TeachingDemos㤼㸲:
cnvrt.coords, subplot
The following objects are masked from 㤼㸱package:base㤼㸲:
format.pval, round.POSIXt, trunc.POSIXt, units
library(randomForest)
randomForest 4.6-12
Type rfNews() to see new features/changes/bug fixes.
Attaching package: 㤼㸱randomForest㤼㸲
The following object is masked from 㤼㸱package:Hmisc㤼㸲:
combine
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
margin
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- c(paste("real", 1:5, sep = ""),
paste("bogus", 1:5, sep = ""))
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 5+(1:ncol(bogus)), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
normalization <- preProcess(x)
x <- predict(normalization, x)
x <- as.data.frame(x)
subsets <- c(1:5, 10, 15, 20, 25)
set.seed(10)
ctrl <- rfeControl(functions = lmFuncs,
method = "repeatedcv",
repeats = 5,
verbose = FALSE)
lmProfile <- rfe(x, y,
sizes = subsets,
rfeControl = ctrl)
lmProfile
predictors(lmProfile)
lmProfile$fit
head(lmProfile$resample)
trellis.par.set(caretTheme())
plot(lmProfile, type = c("g", "o"))
rfRFE <- list(summary = defaultSummary,
fit = function(x, y, first, last, ...){
library(randomForest)
randomForest(x, y, importance = first, ...)
},
pred = function(object, x) predict(object, x),
rank = function(object, x, y) {
vimp <- varImp(object)
vimp <- vimp[order(vimp$Overall,decreasing = TRUE),,drop = FALSE]
vimp$var <- rownames(vimp)
vimp
},
selectSize = pickSizeBest,
selectVar = pickVars)
ctrl$functions <- rfRFE
ctrl$returnResamp <- "all"
set.seed(10)
rfProfile <- rfe(x, y, sizes = subsets, rfeControl = ctrl)
rfProfile
trellis.par.set(caretTheme())
plot1 <- plot(rfProfile, type = c("g", "o"))
plot2 <- plot(rfProfile, type = c("g", "o"), metric = "Rsquared")
print(plot1, split=c(1,1,1,2), more=TRUE)
print(plot2, split=c(1,2,1,2))
plot1 <- xyplot(rfProfile,
type = c("g", "p", "smooth"),
ylab = "RMSE CV Estimates")
plot2 <- densityplot(rfProfile,
subset = Variables < 5,
adjust = 1.25,
as.table = TRUE,
xlab = "RMSE CV Estimates",
pch = "|")
print(plot1, split=c(1,1,1,2), more=TRUE)
print(plot2, split=c(1,2,1,2))