지도학습 문서 분류 - SVM
1. Train DTM 만들기
txt_base=read.any("data/보험_sample_train.csv",header=T)
txt=as.matrix(txt_base)
txt=SimplePos22(txt)
nounVec=words.nc(txt)
nounFreq=table(nounVec)
nounFreq
keywords <- names(head(sort(nounFreq,decreasing = T),30))
keywords
library(makeDTM)
train <- makeDTM(txt_base,key=keywords,LABEL = T,TEXT.name = "민원제목",LABEL.name = "금융권역")
library(e1071)
tune.svm(factor(LABEL)~.,data=train,gamma=2^(-1:1),cost=2^(2:4))
summary(t)
m <-svm(factor(LABEL)~.,data=train,gamma=0.5,cost=8)
m2. Test DTM 만들기
txt_base_test=read.any("data/보험_sample_test.csv",header=T)
txt_test=as.matrix(txt_base_test)
txt_test=SimplePos22(txt_test)
nounVec=words.nc(txt_test)
nounFreq=table(nounVec)
nounFreq
keywords <- names(head(sort(nounFreq,decreasing = T),30))
keywords
library(makeDTM)
test <- makeDTM(txt_base_test,key=keywords,LABEL = T,TEXT.name = "민원제목",LABEL.name = "금융권역")
library(e1071)
tune.svm(factor(LABEL)~.,data=test,gamma=2^(-1:1),cost=2^(2:4))
m_test <-svm(factor(LABEL)~.,data=train,gamma=2,cost=4)
m_test3. 예측 및 정확도 확인
test.predict <- predict(m,test)
test.predict
table <- table(real=test$LABEL,predict=test.predict)
table
(table[1,1]+table[2,2]+table[3,3])/sum(table)