지도학습 문서 분류 - SVM

1. Train DTM 만들기

txt_base=read.any("data/보험_sample_train.csv",header=T)

txt=as.matrix(txt_base)
txt=SimplePos22(txt)
nounVec=words.nc(txt)

nounFreq=table(nounVec)
nounFreq

keywords <- names(head(sort(nounFreq,decreasing = T),30))
keywords

library(makeDTM)
train <- makeDTM(txt_base,key=keywords,LABEL = T,TEXT.name = "민원제목",LABEL.name = "금융권역")

library(e1071)
tune.svm(factor(LABEL)~.,data=train,gamma=2^(-1:1),cost=2^(2:4))
summary(t)
m <-svm(factor(LABEL)~.,data=train,gamma=0.5,cost=8)
m

2. Test DTM 만들기

txt_base_test=read.any("data/보험_sample_test.csv",header=T)

txt_test=as.matrix(txt_base_test)
txt_test=SimplePos22(txt_test)
nounVec=words.nc(txt_test)

nounFreq=table(nounVec)
nounFreq

keywords <- names(head(sort(nounFreq,decreasing = T),30))
keywords

library(makeDTM)
test <- makeDTM(txt_base_test,key=keywords,LABEL = T,TEXT.name = "민원제목",LABEL.name = "금융권역")

library(e1071)
tune.svm(factor(LABEL)~.,data=test,gamma=2^(-1:1),cost=2^(2:4))
m_test <-svm(factor(LABEL)~.,data=train,gamma=2,cost=4)
m_test

3. 예측 및 정확도 확인

test.predict <- predict(m,test)
test.predict

table <- table(real=test$LABEL,predict=test.predict)
table

(table[1,1]+table[2,2]+table[3,3])/sum(table)