library(MASS)
library(RCurl)
stepwiseReg <- function(data, direction){
null <- lm(y~1, data)
full <- lm(y~., data)
stepF <- stepAIC(null,
scope=list(lower=null, upper=full),
direction=direction, trace=FALSE)
return(stepF)
}
##
get_random_vec <- function(N=100, x_name="x", min=-1, max=1){
return (runif(N, min, max))
}
get_randomwalk_vec <- function(N=100, x_name="x", min=-1, max=1){
x_ <- 0
v <- c()
for(i in 1:N){
x_ <- x_ + runif(1, min, max)
v <- append(v, x_)
}
return (v)
}
simulate <- function(N=100, k=10, M=10){
df <- data.frame("V1"=c(1:N))
for (i in 1:(k+M)){
df[, i] <- get_random_vec(N,min=-2.5, max=2.5)
}
vars <- colnames(df)
features <- sample(vars, k)
weights <- c(runif(k, -5, 5))
intercept <- runif(1, 5,50)
y <- rep(0, N)
for (f in 1:k){
y <- y + weights[f] * df[,features[f]] + runif(N, -1,1)
}
y <- y + intercept
df$y <- y
bound <- floor((nrow(df)/2))
df <- df[sample(nrow(df)), ] #shuffle
df.train <- df[1:bound, ] #get training set
df.test <- df[(bound+1):nrow(df), ] #get test set
m <- stepwiseReg(data=df.train, direction="both")
# remove intercept
est_coefs <- c(names(m$coefficients))
est_coefs <- est_coefs[est_coefs!="(Intercept)"]
# ratio of correct selections
r <- length(intersect(est_coefs, features)) / length(est_coefs)
# sum-of-squared errors
sse = sum((m$fitted.values - y)^2)
return (c(round(r, 2), round(sse,2)))
}
# Play
runs_count = 100
df_results <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(df_results) <- c('rcs', 'sse')
for (i in 1:runs_count){
r <- nrow(df_results) + 1
df_results[r,] <- simulate(N=200, k=10, M=20)
}
plot(df_results$rcs, ylim = c(0,1), ylab='Ratio of Correct Selections')

plot(df_results$sse, ylab='SSE')
