Inicializa
library (ISLR)
str(Auto)
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : num 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : num 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : num 3504 3693 3436 3433 3449 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : num 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : num 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
variables <-
data.frame(dependiente = names(Auto)[1],
independientes = names(Auto)[2:(ncol(Auto) - 1)])
set.seed (1)
train=sample (nrow(Auto) ,nrow(Auto)*0.7)
Función
stepwise_forward <- function(dependiente,independientes,dependiente_f){
if(dependiente_f == 0){
dependiente_f <- paste(dependiente,"~", sep = "")
}
r2 <- c(r2=rep(1:length(independientes)))
ar2 <- c(r2=rep(1:length(independientes)))
rss <- c(rss=rep(1:length(independientes)))
r22 <- 0
formu <- ""
res <- data.frame(independientes,r2,rss,formu)[1,]
for (i in 1:length(independientes)) {
temp <- paste(dependiente_f,independientes[i],collapse = "+")
# print(paste("formula:",temp))
temp.fit<-lm(temp, data=Auto, subset = train)
r2[i] <- summary(temp.fit)$r.squared
rss[i] <- deviance(temp.fit)
ar2[i] <- summary(temp.fit)$adj.r.squared
if(r22 == 0){
r22 <- summary(temp.fit)$r.squared
formu <- temp
# print(paste("menor r2:", r22))
}else {
if ( summary(temp.fit)$r.squared > r22) {
r22 <- summary(temp.fit)$r.squared
formu <- temp
# print(paste("menor r2:", r22))
} else {
0
}
}
}
temp <- data.frame(independientes,r2,rss,ar2,formu)
temp
print(paste("selecciona:", temp[temp$r2 == r22,]$independientes))
print(paste("formula:",formu))
res <- temp[temp$r2 == r22,]
selec <- independientes==temp[temp$r2 == r22,]$independientes
independientes <- independientes[!selec]
if(is.na(independientes[1]) ){
dependiente_f <- paste(dependiente_f,temp[temp$r2 == r22,]$independientes, sep = "")
return(res)
} else {
dependiente_f <- paste(dependiente_f,temp[temp$r2 == r22,]$independientes,"+", sep = "")
res <- rbind(res,stepwise_forward(dependiente,independientes,dependiente_f))
}
}
Llamada a la funcion
modelos <- stepwise_forward(unique(variables$dependiente),variables$independientes,0)
## [1] "selecciona: weight"
## [1] "formula: mpg~ weight"
## [1] "selecciona: year"
## [1] "formula: mpg~weight+ year"
## [1] "selecciona: origin"
## [1] "formula: mpg~weight+year+ origin"
## [1] "selecciona: horsepower"
## [1] "formula: mpg~weight+year+origin+ horsepower"
## [1] "selecciona: displacement"
## [1] "formula: mpg~weight+year+origin+horsepower+ displacement"
## [1] "selecciona: cylinders"
## [1] "formula: mpg~weight+year+origin+horsepower+displacement+ cylinders"
## [1] "selecciona: acceleration"
## [1] "formula: mpg~weight+year+origin+horsepower+displacement+cylinders+ acceleration"
Resultados
str(modelos)
## 'data.frame': 7 obs. of 5 variables:
## $ independientes: Factor w/ 7 levels "acceleration",..: 6 7 5 4 3 2 1
## $ r2 : num 0.691 0.806 0.815 0.817 0.818 ...
## $ rss : num 5099 3201 3047 3022 3007 ...
## $ ar2 : num 0.69 0.804 0.813 0.814 0.814 ...
## $ formu : Factor w/ 7 levels "mpg~ weight",..: 1 2 3 4 5 6 7
modelos
## independientes r2 rss ar2
## r24 weight 0.6906605 5099.450 0.6895232
## r25 year 0.8058003 3201.375 0.8043671
## r251 origin 0.8151730 3046.866 0.8131194
## r23 horsepower 0.8167047 3021.616 0.8139791
## r22 displacement 0.8175892 3007.036 0.8141860
## r21 cylinders 0.8194478 2976.396 0.8153905
## r2 acceleration 0.8194568 2976.248 0.8147057
## formu
## r24 mpg~ weight
## r25 mpg~weight+ year
## r251 mpg~weight+year+ origin
## r23 mpg~weight+year+origin+ horsepower
## r22 mpg~weight+year+origin+horsepower+ displacement
## r21 mpg~weight+year+origin+horsepower+displacement+ cylinders
## r2 mpg~weight+year+origin+horsepower+displacement+cylinders+ acceleration