data_all <- read.csv("train.csv", header=TRUE)
train_size = floor(0.7*nrow(data_all))
train_indx = sample(seq_len(nrow(data_all)), size = train_size)
data = data_all[train_indx,]
train = data[,-1]
train = data.matrix(train)
y = data$label
y=replace(y,y==0,10)
train_nl = (train-125)/255
h_layer=25
t1_epsilon = sqrt(6)/sqrt(ncol(train)+h_layer)
t2_epsilon = sqrt(6)/sqrt(h_layer+10)
t1_random = matrix(runif((ncol(train)+1)*25,0,1),25,(ncol(train)+1))
t2_random = matrix(runif(26*10,0,1),10,26)
t1_random = t1_random*2*t1_epsilon - t1_epsilon
t2_random = t2_random*2*t2_epsilon - t2_epsilon
ini = c(as.vector(t1_random),as.vector(t2_random))
lambda=1
costFunction = function(p) {
result = NN_cost(train_nl,y,p,lambda=lambda)
J = result$cost
grad = c(as.vector(result$grad$t1),as.vector(result$grad$t2))
grad = as.matrix(grad,length(grad),1)
return(list(J=J,grad=grad))
}
optimization = fmincg(f=costFunction, X=par, Maxiter=500)