##================================================================
## 01. 환경설정 및 데이터 불러오기
##================================================================
library(nnet)
letters <- read.csv("letterdata.csv")

normalize <- function(x) { 
  return((x - min(x)) / (max(x) - min(x)))
}

letters_norm <- as.data.frame(lapply(letters[,2:17], normalize))
letters_norm$letter <- letters$letter

# nnet 종속변수 입력을 위한 종속변수 변환
letters.ind <- class.ind(letters_norm$letter)
letters_norm <- cbind(letters_norm, letters.ind)

##================================================================
## 02. 훈련 및 검증 데이터 분리
##================================================================
set.seed(123)
train.idx  <-  sample(1:20000,10000)
letters.train.df <- letters_norm[train.idx, ]
letters.test.df <- letters_norm[-train.idx, ]

##================================================================
## 03. 신경망 모형 적합
##================================================================
# letters.nn <- nnet(x = letters.train.df[,c(2:17)], y=letters.train.df[,c(18:43)], size = 10, softmax = T)
letters.nn <- nnet(x = letters.train.df[,c(1:16)], y=letters.train.df[,c(18:43)], size = 12, softmax = T)
## # weights:  542
## initial  value 34737.159702 
## iter  10 value 29354.350686
## iter  20 value 21609.044510
## iter  30 value 19612.457589
## iter  40 value 17099.598859
## iter  50 value 14249.063867
## iter  60 value 12396.858490
## iter  70 value 11252.963729
## iter  80 value 10388.243165
## iter  90 value 9735.159284
## iter 100 value 9049.466788
## final  value 9049.466788 
## stopped after 100 iterations
##================================================================
## 04. 검증 데이터 예측 및 모형 평가
##================================================================
# 훈련데이터
# letters.pred <- predict(letters.nn, letters.train.df[,c(2:17)], type = "class")
letters.pred <- predict(letters.nn, letters.train.df[,c(1:16)], type = "class")
a<-table(letters.pred, letters.train.df$letter)
sum(diag(a))/sum(a)
## [1] 0.744
# 검증데이터
letters.pred <- predict(letters.nn, letters.test.df[,c(1:16)], type="class")
a<-table(letters.pred, letters.test.df$letter)
sum(diag(a))/sum(a)
## [1] 0.7272
# 
# ##================================================================
# ## 05. 신경망 시각화
# ##================================================================
# # 시각화 R 코드 함수 다운로드
# library(devtools)
# source_url('https://gist.githubusercontent.com/fawda123/7471137/raw/466c1474d0a505ff044412703516c34f1a4684a5/nnet_plot_update.r')
# 
# # 신경망 모형 시각화
# library(reshape2)
# plot.nnet(iris.nn)
# plot.nnet(letters.nn)