Lekshman Ramesh
RemoveIdenticalCol=function(train){
rep_names1=NULL
for (i in (1:(ncol(train)-1)))
{ for (j in ((i+1):ncol(train)))
{ if (identical(train[[i]],train[[j]]))
{ rep_names1=c(rep_names1,names(train)[i])
}
}
}
remove_list=unique(rep_names1)
train=train[,!names(train) %in% remove_list]
return(train)
}train=RemoveIdenticalCol(train)
dim(train)## [1] 76020 309
repvalues=function(x){
check=table(x, useNA = "always")
check2=check[which.max(check)]
return (cbind(check2,len=length(x)))
}
useless_var=function(x,n){
mode_count=sapply(x,function(y)repvalues(y))
red_prop=mode_count[1,]/mode_count[2,]
red_prop=red_prop[red_prop<=n]
return (red_prop)
}red_prop=useless_var(train,0.995)
train=subset(train, select=names(red_prop))
dim(train)## [1] 76020 157