library(MASS)
library(RCurl)

stepwiseReg <- function(data, direction){
  null <- lm(y~1, data)
  full <- lm(y~., data)
  stepF <- stepAIC(null, 
                   scope=list(lower=null, upper=full), 
                   direction=direction, trace=FALSE)
  return(stepF)
}


##
get_random_vec <- function(N=100, x_name="x", min=-1, max=1){
  return (runif(N, min, max))
}
get_randomwalk_vec <- function(N=100, x_name="x", min=-1, max=1){
  x_ <- 0
  v <- c()
  for(i in 1:N){
    x_ <- x_ + runif(1, min, max)
    v <- append(v, x_)
  }
  return (v)
}

simulate <- function(N=100, k=10, M=10){
  df <- data.frame("V1"=c(1:N))
  for (i in 1:(k+M)){
    df[, i] <- get_random_vec(N,min=-2.5, max=2.5)
  }
  vars <- colnames(df)
  features <- sample(vars, k)
  weights <- c(runif(k, -5, 5))
  intercept <- runif(1, 5,50)
  y <- rep(0, N)
  for (f in 1:k){
    y <- y + weights[f] * df[,features[f]] +  runif(N, -1,1)
  }
  y <- y + intercept 
  df$y <- y
  bound <- floor((nrow(df)/2))
  df <- df[sample(nrow(df)), ]           #shuffle 
  df.train <- df[1:bound, ]              #get training set
  df.test <- df[(bound+1):nrow(df), ]    #get test set
  m <- stepwiseReg(data=df.train, direction="both")
  # remove intercept
  est_coefs <- c(names(m$coefficients))
  est_coefs <- est_coefs[est_coefs!="(Intercept)"]
  # ratio of correct selections
  r <- length(intersect(est_coefs, features)) / length(est_coefs)
  # sum-of-squared errors
  sse = sum((m$fitted.values - y)^2)
  return (c(round(r, 2), round(sse,2)))
}
# Play
runs_count = 100
df_results <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(df_results) <- c('rcs', 'sse')
for (i in 1:runs_count){
  r <- nrow(df_results) + 1
  df_results[r,] <- simulate(N=200, k=10, M=20)
}
plot(df_results$rcs, ylim = c(0,1), ylab='Ratio of Correct Selections')

plot(df_results$sse, ylab='SSE')