#3/26 学習データ数を254、テストデータ数を28に変更 参考資料

https://qiita.com/nkjm/items/e751e49c7d2c619cbeab

https://momonoki2017.blogspot.com/2018/04/r007-riris.html

https://rpubs.com/fumi/582119

http://d-m-l.jp/Rbiz/task_rf.html

https://funatsu-lab.github.io/open-course-ware/machine-learning/random-forest/

http://takenaka-akio.org/doc/r_auto/chapter_03.html

http://yut.hatenablog.com/entry/20120827/1346024147

https://mjin.doshisha.ac.jp/R/Chap_23/23.html

https://qiita.com/TsutomuNakamura/items/a1a6a02cb9bb0dcbb37f 混同行列(Confusion Matrix) とは

——–ランダムフォレスト概要

http://d-m-l.jp/Rbiz/task_rf.html ランダムフォレストとは機械学習のアルゴリズムの1つで、学習用のデータをランダムにサンプリングして多数の決定木を作成し、作成した決定木をもとに多数決で結果を決める方法です。精度、汎用性が高く扱いやすい分析手法です。

ランダムフォレストの特徴

————————————————————————-

ランダムフォレストでデータを分析するアルゴリズム

#ランダムフォレストで使用するデータ - Titanics.rpart - Titanic - Titanichはtraingが統計処理されたデータでありこの演習には不向き - cordataは、グラフィック用に処理されたデータでありtrainのPclasswを3区分したり、sexを2区分するなど一部質的化したが、Fareh・年齢は量的データのままであり、氏名はそのままであり、欠落のあるデータは補完してある。 - ダミー変数ummy_varn等はカテゴリーデータをintegerデータに置き換えたものであり以下の論点に合わないらしいので使わない - lldataを使っても良いが、(makedummies()を使用してダミー変数)を実施する前のdumとnot_dum結合した、 - train2を使用する

#randomForestではCharacterは使わないようにしよう http://ushi-goroshi.hatenablog.com/entry/2019/01/30/171259

library(car)
## Loading required package: carData
library(caret)
## Warning: package 'caret' was built under R version 3.6.2
## Loading required package: lattice
library(cluster)
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(e1071)
## Warning: package 'e1071' was built under R version 3.6.2
library(epitools)
library(effects)
## Warning: package 'effects' was built under R version 3.6.3
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## Use the command
##     lattice::trellis.par.set(effectsTheme())
##   to customize lattice options for effects plots.
## See ?effectsTheme for details.
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.6.2
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.6.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(ranger)
## Warning: package 'ranger' was built under R version 3.6.2
## 
## Attaching package: 'ranger'
## The following object is masked from 'package:randomForest':
## 
##     importance
library(rgl)
library(rattle)
## Warning: package 'rattle' was built under R version 3.6.2
## Rattle: A free graphical interface for data science with R.
## バージョン 5.3.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## 'rattle()' と入力して、データを多角的に分析します。
## 
## Attaching package: 'rattle'
## The following object is masked from 'package:ranger':
## 
##     importance
## The following object is masked from 'package:randomForest':
## 
##     importance
library(readr)
## Warning: package 'readr' was built under R version 3.6.2
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.2
## Loading required package: rpart
library(rpart)
library(readr)
library(reshape)
## Warning: package 'reshape' was built under R version 3.6.2
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following object is masked from 'package:data.table':
## 
##     melt
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 3.6.2
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
## The following objects are masked from 'package:reshape':
## 
##     expand, smiths
library(xtable)
library(nnet)
## Warning: package 'nnet' was built under R version 3.6.2
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(randomForest)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## -- Attaching packages ------------------------------------------------------------------------------ tidyverse 1.3.0 --
## √ tibble  2.1.3     √ stringr 1.4.0
## √ purrr   0.3.3     √ forcats 0.4.0
## Warning: package 'stringr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## -- Conflicts --------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::between()        masks data.table::between()
## x randomForest::combine() masks dplyr::combine()
## x tidyr::expand()         masks reshape::expand()
## x dplyr::filter()         masks stats::filter()
## x dplyr::first()          masks data.table::first()
## x dplyr::lag()            masks stats::lag()
## x dplyr::last()           masks data.table::last()
## x purrr::lift()           masks caret::lift()
## x randomForest::margin()  masks ggplot2::margin()
## x dplyr::recode()         masks car::recode()
## x reshape::rename()       masks dplyr::rename()
## x purrr::some()           masks car::some()
## x purrr::transpose()      masks data.table::transpose()

下水道データ読み込み# 基本統計量表示 gesui # 教科書ではlogit

gesui = read_csv("osui.csv")
## Parsed with column specification:
## cols(
##   OBJECTID = col_double(),
##   sys_name = col_double(),
##   slope = col_double(),
##   uedokaburi = col_double(),
##   masuhonsuu = col_double(),
##   long = col_double(),
##   kubun = col_double(),
##   did = col_double(),
##   kouhou = col_double(),
##   nendo = col_double(),
##   ekijyouka = col_double(),
##   kyouyounensuu = col_double(),
##   kansyu = col_double(),
##   kei = col_double(),
##   kinkyuudo = col_double(),
##   taisyo = col_double()
## )
gesui<- data.frame(gesui) # 教科書ではlogit
#OBJECTID列をデータから削除
gesui <- gesui[-1:-2] 
stargazer(as.data.frame(gesui),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 1,423 3.437 2.323 -6 1.8 4.5 10
uedokaburi 1,423 4.371 2.475 0.360 2.727 4.949 13.863
masuhonsuu 1,423 1.338 1.808 0 0 2 13
long 1,423 35.129 18.972 0.970 21.445 46.510 196.280
kubun 1,423 1.204 0.403 1 1 1 2
did 1,423 0.696 0.460 0 0 1 1
kouhou 1,423 0.415 0.493 0 0 1 1
nendo 1,423 1,982.967 5.973 1,974 1,978 1,990 2,006
ekijyouka 1,423 0.396 0.611 0 0 1 4
kyouyounensuu 1,423 33.033 5.973 10 26 38 42
kansyu 1,423 1.198 0.399 1 1 1 2
kei 1,423 517.182 308.765 200 250 800 1,650
kinkyuudo 1,423 1.297 1.457 0 0 3 3
taisyo 1,423 0.448 0.497 0 0 1 1
gesui$kansyu <- as.factor(gesui$kansyu)
gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
stargazer(as.data.frame(gesui),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 1,423 3.437 2.323 -6 1.8 4.5 10
uedokaburi 1,423 4.371 2.475 0.360 2.727 4.949 13.863
masuhonsuu 1,423 1.338 1.808 0 0 2 13
long 1,423 35.129 18.972 0.970 21.445 46.510 196.280
kouhou 1,423 0.415 0.493 0 0 1 1
nendo 1,423 1,982.967 5.973 1,974 1,978 1,990 2,006
kyouyounensuu 1,423 33.033 5.973 10 26 38 42
kei 1,423 517.182 308.765 200 250 800 1,650
kinkyuudo 1,423 1.297 1.457 0 0 3 3
exclude_cols = c("OBJECTID","kinkyuudo")
gesui = gesui[ !names(gesui) %in% exclude_cols ]
randomgesui<-sample(1281,142)
test <- gesui[randomgesui,]
train <-gesui[-randomgesui,]
gesui <- train

model = randomForest(taisyo ~ ., data = gesui)
model
## 
## Call:
##  randomForest(formula = taisyo ~ ., data = gesui) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 25.84%
## Confusion matrix:
##     0   1 class.error
## 0 560 147   0.2079208
## 1 184 390   0.3205575
predition = predict(model, gesui)
predition
##    2    3    4    6    7    8    9   10   12   13   14   15   16   17   18   19 
##    1    1    0    0    1    1    1    1    0    1    1    0    0    0    1    1 
##   21   22   23   24   25   26   27   28   29   30   32   33   34   35   36   37 
##    0    1    1    1    1    0    0    1    1    0    1    1    1    0    0    1 
##   38   39   40   41   42   43   45   46   47   48   49   51   52   53   54   55 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    0 
##   56   57   58   59   60   61   62   63   64   65   66   67   68   69   70   71 
##    0    0    1    0    1    1    1    0    0    0    1    1    1    1    0    0 
##   72   73   74   75   76   77   78   79   80   81   82   83   84   85   86   87 
##    1    0    0    1    1    0    0    1    0    1    1    1    1    1    1    0 
##   88   89   90   91   92   93   94   95   96   97   98   99  100  101  102  103 
##    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0 
##  104  105  106  107  108  109  110  111  112  113  115  116  117  118  119  120 
##    1    0    0    0    1    0    0    1    0    1    1    0    0    1    0    1 
##  121  122  123  124  125  126  127  128  129  130  131  132  133  135  136  137 
##    0    0    0    1    0    0    0    0    1    0    0    0    0    1    0    1 
##  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  154 
##    1    1    1    0    0    0    0    0    0    0    1    0    0    0    0    0 
##  155  156  157  160  162  163  164  165  166  167  168  169  170  172  173  174 
##    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  176  177  178  179  180  181  182  183  184  185  186  187  189  190  191  192 
##    0    0    0    0    0    0    1    1    1    0    1    0    0    0    0    0 
##  193  194  195  196  198  199  200  201  202  203  204  205  206  207  209  210 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    0    0    0 
##  211  212  213  214  215  216  219  220  222  223  224  225  226  227  228  229 
##    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  231  232  233  234  235  236  237  239  240  242  243  244  245  246  247  249 
##    1    1    0    1    1    1    1    1    0    1    1    1    1    1    1    1 
##  250  251  252  253  254  255  256  258  259  260  261  262  263  265  266  267 
##    1    1    1    1    1    1    1    1    1    1    1    1    0    1    0    0 
##  268  269  270  271  272  273  274  275  276  277  278  279  280  281  282  283 
##    1    1    1    1    1    0    0    0    1    1    1    1    0    1    1    1 
##  284  285  286  287  288  289  290  291  292  293  295  297  298  299  300  301 
##    0    0    0    0    0    0    0    0    0    0    0    1    1    0    1    0 
##  302  304  305  306  307  308  310  311  312  313  314  315  316  317  318  319 
##    0    1    0    1    0    0    0    0    0    1    0    0    0    0    1    1 
##  320  321  322  323  324  325  326  327  328  329  330  331  332  333  334  335 
##    1    1    1    1    1    1    0    0    0    0    0    0    0    1    1    1 
##  336  338  339  340  341  342  345  346  347  348  349  350  351  352  353  354 
##    1    1    0    1    0    1    1    1    0    1    1    1    0    1    1    1 
##  355  356  359  360  361  362  363  365  366  367  368  369  370  371  372  373 
##    1    0    0    0    0    0    0    1    1    1    1    1    1    1    1    1 
##  374  375  376  377  378  379  380  381  382  383  384  385  386  387  388  389 
##    1    1    1    1    1    1    1    1    1    0    0    0    1    1    1    1 
##  390  392  393  394  395  396  398  399  400  401  402  403  404  405  406  407 
##    1    1    0    1    1    0    1    1    1    1    0    1    1    1    0    1 
##  408  410  411  412  414  415  416  417  418  420  421  422  423  424  425  426 
##    0    0    0    0    0    0    0    0    0    1    0    1    1    0    1    0 
##  429  432  433  434  435  436  437  438  440  441  442  444  445  446  447  448 
##    1    1    1    1    1    1    0    1    1    1    1    1    1    0    1    1 
##  449  450  452  453  454  455  456  457  458  459  460  462  463  464  465  466 
##    0    1    0    1    1    1    1    1    1    1    1    0    0    0    0    1 
##  467  468  469  470  471  472  473  474  475  476  477  478  479  480  481  482 
##    1    0    0    0    1    0    1    1    0    0    0    0    0    1    1    1 
##  483  484  485  486  487  488  489  490  492  493  494  495  496  497  498  499 
##    1    1    1    1    1    1    0    0    1    1    1    1    0    1    1    1 
##  500  501  502  503  504  505  506  507  508  509  510  511  512  513  514  515 
##    0    1    0    1    1    1    0    1    0    1    0    1    1    1    1    0 
##  516  517  518  520  521  522  523  524  525  526  527  528  529  530  531  532 
##    1    1    0    0    0    1    1    1    1    1    1    1    1    0    0    1 
##  533  535  536  537  539  540  541  542  543  544  546  547  548  549  550  551 
##    1    1    1    1    1    1    1    0    1    1    1    0    0    1    0    0 
##  553  554  555  556  557  558  560  561  562  563  564  565  567  568  569  570 
##    0    0    0    1    0    1    0    0    0    0    0    0    1    1    0    0 
##  571  572  573  574  575  577  578  579  580  581  582  583  584  585  586  587 
##    1    0    0    0    0    0    0    1    0    0    0    1    0    1    1    0 
##  588  590  591  592  594  595  596  597  598  599  600  601  602  603  604  605 
##    1    0    0    1    0    1    0    0    0    0    0    0    0    1    0    1 
##  606  607  608  609  610  611  612  613  614  618  619  621  622  623  624  625 
##    1    1    0    0    0    0    1    1    1    1    1    1    1    1    1    1 
##  627  628  629  630  631  632  633  634  635  636  637  638  639  640  641  642 
##    1    1    0    1    0    0    1    0    1    0    0    0    0    1    1    0 
##  643  644  645  646  647  649  650  651  652  653  654  655  656  657  658  660 
##    1    1    0    0    1    0    0    0    1    0    1    1    1    1    1    1 
##  661  662  663  664  665  666  667  668  669  671  672  673  674  675  676  677 
##    1    1    1    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  678  679  680  681  682  683  684  685  686  687  689  690  691  692  693  694 
##    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0 
##  695  696  697  698  699  700  701  704  706  707  708  709  711  712  713  715 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0 
##  716  717  719  720  721  722  723  724  725  726  727  728  729  730  731  732 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  733  734  735  736  737  738  739  740  741  743  744  745  746  747  748  749 
##    0    1    0    0    0    0    0    1    1    1    0    0    0    1    0    0 
##  750  751  752  753  754  755  756  757  758  759  760  761  762  763  764  765 
##    0    0    0    0    0    1    1    0    0    1    0    0    0    0    0    0 
##  766  768  769  770  771  772  773  776  777  778  780  781  782  783  784  785 
##    0    0    0    0    0    1    0    0    1    0    0    0    0    0    1    0 
##  786  787  788  790  793  794  795  796  797  798  799  800  801  802  803  804 
##    1    0    0    0    0    0    0    1    1    1    0    1    1    1    1    1 
##  805  806  807  808  809  810  811  812  813  814  815  816  817  818  819  820 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    0    1 
##  821  822  823  824  825  826  828  829  831  832  833  834  835  836  837  838 
##    0    0    0    1    0    0    0    1    0    1    0    0    0    0    0    0 
##  839  840  841  842  843  844  845  846  847  848  849  850  851  852  853  854 
##    0    0    0    0    0    0    0    0    0    1    0    0    0    1    1    1 
##  855  856  857  858  859  860  861  862  863  865  866  867  868  869  870  871 
##    0    1    1    0    0    0    0    0    0    0    0    0    0    1    1    0 
##  873  874  876  877  878  879  881  882  883  884  885  886  887  888  889  890 
##    0    1    1    1    0    0    0    0    1    0    1    1    0    1    1    1 
##  891  892  893  894  895  896  897  898  901  902  903  904  905  906  907  908 
##    1    1    1    1    0    0    0    1    1    1    1    1    0    0    1    1 
##  909  910  911  912  913  914  915  916  917  918  919  920  921  923  924  925 
##    1    1    1    1    1    1    1    1    1    1    1    0    1    1    0    1 
##  926  927  930  931  932  933  934  935  937  938  939  940  941  942  943  944 
##    1    0    1    1    0    1    1    1    0    0    1    1    1    0    1    0 
##  945  946  947  949  950  951  952  953  955  956  957  959  960  961  962  964 
##    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    1 
##  966  967  968  969  970  971  972  973  974  975  976  977  978  980  981  983 
##    1    1    0    1    0    0    0    0    0    0    1    0    0    0    0    0 
##  984  985  987  988  989  990  992  993  994  995  996  997  998  999 1000 1001 
##    1    0    0    1    1    1    1    1    1    1    1    1    1    0    1    1 
## 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1016 1017 1018 
##    1    0    1    0    0    0    1    1    1    1    1    1    1    1    1    0 
## 1019 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 
##    0    0    1    1    0    0    0    0    1    0    1    0    0    0    0    0 
## 1036 1038 1040 1041 1042 1043 1044 1046 1047 1048 1049 1050 1052 1053 1054 1055 
##    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0 
## 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1069 1070 1071 1072 1073 
##    0    0    0    0    1    0    0    1    1    0    1    0    1    1    1    1 
## 1074 1075 1076 1078 1079 1082 1083 1085 1086 1087 1088 1089 1090 1091 1092 1093 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    0    0 
## 1094 1096 1098 1099 1100 1101 1102 1103 1104 1105 1106 1108 1109 1110 1112 1113 
##    1    1    1    1    1    1    1    1    1    1    1    0    1    0    0    0 
## 1114 1115 1116 1117 1118 1119 1120 1122 1123 1124 1125 1126 1127 1128 1129 1130 
##    1    1    1    1    1    1    0    1    1    0    0    1    1    1    1    1 
## 1131 1132 1133 1135 1136 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 
##    1    1    0    1    0    0    1    1    1    0    0    0    0    0    0    1 
## 1149 1151 1152 1154 1155 1156 1157 1158 1159 1160 1162 1163 1164 1165 1167 1168 
##    1    0    1    0    1    0    0    1    0    0    0    0    0    0    1    0 
## 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1181 1183 1184 1186 1187 
##    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0    0 
## 1188 1189 1190 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 
##    1    1    1    0    0    1    0    1    0    0    0    1    0    1    1    1 
## 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1216 1217 1218 1219 1221 1222 
##    1    1    1    0    0    0    1    1    1    1    0    0    0    1    0    0 
## 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1236 1237 1238 1239 
##    0    0    0    1    1    0    0    0    0    0    0    0    0    0    0    0 
## 1240 1241 1243 1244 1245 1246 1247 1248 1250 1251 1252 1253 1254 1255 1256 1257 
##    0    0    0    0    0    0    1    1    1    1    1    1    1    1    1    1 
## 1258 1259 1260 1263 1265 1266 1267 1268 1270 1271 1272 1273 1274 1275 1276 1277 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1278 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 
##    0    0    1    1    0    0    0    0    0    0    0    1    0    1    0    0 
## 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 
##    0    1    1    1    1    0    0    0    0    0    0    0    0    0    0    0 
## 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 
##    0    0    0    0    0    1    1    0    0    0    0    0    0    0    0    0 
## 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 
##    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
## 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 
##    0    1    0    0    0    0    0    0    0    0    1    0    0    0    0    0 
## 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 
##    0    0    0    1    1    0    0    0    0    1    0    0    0    0    0    0 
## 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
## 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 
##    0    0    0    1    0    0    0    0    1    0    1    1    0    0    0    0 
## 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 
##    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0 
## 1423 
##    0 
## Levels: 0 1
names(gesui)
##  [1] "slope"         "uedokaburi"    "masuhonsuu"    "long"         
##  [5] "kubun"         "did"           "kouhou"        "nendo"        
##  [9] "ekijyouka"     "kyouyounensuu" "kansyu"        "kei"          
## [13] "taisyo"

#参考資料ではimportance(model1)で変数の重みが算出される事になっているが、実際にはmodel1$importancedでないと 算出できない。

model$importance
##               MeanDecreaseGini
## slope                 96.03669
## uedokaburi           110.54773
## masuhonsuu            39.34200
## long                 104.98965
## kubun                 10.28200
## did                   15.27289
## kouhou                12.58609
## nendo                 52.80419
## ekijyouka             23.82998
## kyouyounensuu         51.39913
## kansyu                19.07822
## kei                   51.43232
varImpPlot(model)

ランダムフォレストチューニング(データ検証) http://d-m-l.jp/Rbiz/task_rf.html

http://sfchaos.hatenablog.com/entry/20150628/p1

#注1:set.seed(123)乱数発生 ttps://qiita.com/aich_08_/items/6d885c91c9d461514018

まずは単純にtuneRF関数を実行してみる まずは特別な設定を行わずにtuneRF関数を実行してみよう.tuneRF関数の第1引数には説明変数,第2引数には目的変数を指定する.また,doBest引数をTRUEに指定すると,評価が最も良いモデルを返すようになる.

dim(gesui)
## [1] 1281   13
sapply(gesui, class)
##         slope    uedokaburi    masuhonsuu          long         kubun 
##     "numeric"     "numeric"     "numeric"     "numeric"      "factor" 
##           did        kouhou         nendo     ekijyouka kyouyounensuu 
##      "factor"     "numeric"     "numeric"      "factor"     "numeric" 
##        kansyu           kei        taisyo 
##      "factor"     "numeric"      "factor"
head(gesui)
##   slope uedokaburi masuhonsuu  long kubun did kouhou nendo ekijyouka
## 2  3.90   3.763000          0 15.40     2   1      0  1976         1
## 3  1.32   3.538794          1 14.85     2   1      0  1976         1
## 4  1.22   1.054575          1  3.39     2   1      0  2004         1
## 6  3.50   4.122386          0  9.75     1   1      1  1976         1
## 7  6.80   3.990999          0  2.68     1   1      1  1976         1
## 8  5.00   2.331392          0 12.32     1   1      0  1982         1
##   kyouyounensuu kansyu  kei taisyo
## 2            40      1  800      1
## 3            40      1  250      1
## 4            12      2  200      0
## 6            40      1 1100      0
## 7            40      1 1100      1
## 8            34      1 1100      1
set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
     gesui$taisyo,  # 目的変数
  doBest = T)  #分岐に使う変数の数(mtry)を求めるフラグ
## mtry = 3  OOB error = 27.79% 
## Searching left ...
## mtry = 2     OOB error = 28.65% 
## -0.03089888 0.05 
## Searching right ...
## mtry = 6     OOB error = 27.17% 
## 0.02247191 0.05

この結果,特徴量の個数が

3個のときに,Out-of-Bag誤差(OOB error)は7.11% 6個のときに,Out-of-Bag誤差は6.698%、 2個のときに,Out-of-Bag誤差は6.28%、 1個のときに,Out-of-Bag誤差は5.868%、

となり,特徴量の個数が3個のときにOut-of-Bag誤差が最少となり, この個数に設定するのが良さそうであることがわかる*1

構築する決定木の個数を増やしてみる ntreeTry引数はデフォルトでは50となっており,50個の決定木を構築することがわかる.500個の決定木を構築するように指定してみよう.

set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
  gesui$taisyo,  # 目的変数
  ntreeTry=500, #決定木数
   trace = TRUE, 
  doBest = T)
## mtry = 3  OOB error = 26.39% 
## Searching left ...
## mtry = 2     OOB error = 25.6% 
## 0.0295858 0.05 
## Searching right ...
## mtry = 6     OOB error = 26.15% 
## 0.00887574 0.05

3個のときに,Out-of-Bag誤差(OOB error)が最大であることは変わらない

チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。

gesui.rf <- randomForest(  # 予測、分類器の構築
  taisyo ~ ., # モデル式
  data = gesui,  # データ
  mtry = gesui.tune$mtry)  # 分岐に使う変数の数
gesui.rf
## 
## Call:
##  randomForest(formula = taisyo ~ ., data = gesui, mtry = gesui.tune$mtry) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 25.92%
## Confusion matrix:
##     0   1 class.error
## 0 559 148   0.2093352
## 1 184 390   0.3205575
x=gesui.rf$importance

出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。下の例では”0”(緊急度3以下)と478個予測したうち、実際に”0”だったものが450個、“1”だったものが28個と読み取れます。

#重要度順のグラフを出力

rank <- data.frame(x)  # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank)  # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),]  # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank)  # ランキングを行名にする
rank
##    MeanDecreaseGini        factor
## 1         83.128375    uedokaburi
## 2         80.471561          long
## 3         75.992068         slope
## 4         49.962255         nendo
## 5         48.638487 kyouyounensuu
## 6         45.147943           kei
## 7         32.159994    masuhonsuu
## 8         22.306871     ekijyouka
## 9         16.112339        kansyu
## 10        14.786870           did
## 11        11.065621        kouhou
## 12         8.724148         kubun

重要度順のグラフを出力

varImpPlot(gesui.rf)

plot(gesui, col=c(2, 3)[gesui$taisyo])

パラメータチューニングしたモデルでの予測

predition = predict(gesui.rf, test)

予測値と実測値の対比

table(predition,test$taisyo)
##          
## predition  0  1
##         0 64 16
##         1 15 47

緊急度の判定

-下水道データ読み込み# 基本統計量表示 gesui # 教科書ではlogit

gesui = read_csv("gesuidou.csv")
## Parsed with column specification:
## cols(
##   OBJECTID = col_double(),
##   slope = col_double(),
##   long = col_double(),
##   uedokaburi = col_double(),
##   sitadokaburi = col_double(),
##   masuhonsuu = col_double(),
##   nendo = col_double(),
##   kei = col_double(),
##   kubun = col_double(),
##   did = col_double(),
##   kouhou = col_double(),
##   ekijyouka = col_double(),
##   kansyu = col_double(),
##   kinkyuudo = col_double(),
##   taisyo = col_double()
## )
gesui<- data.frame(gesui) # 教科書ではlogit

gesui$kansyu <- as.factor(gesui$kansyu)
gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
#gesui <- gesui[-1] #OBJECTID列をデータから削除
exclude_cols = c("OBJECTID","sys_name")
gesui = gesui[ !names(gesui) %in% exclude_cols ]
set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
  gesui$kinkyuudo,  # 目的変数
  ntreeTry=500, #決定木数
   trace = TRUE, 
  doBest = T)
## Warning in randomForest.default(x, y, mtry = mtryStart, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 4  OOB error = 1.480305 
## Searching left ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 2     OOB error = 1.486863 
## -0.004430167 0.05 
## Searching right ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 8     OOB error = 1.519686 
## -0.02660376 0.05
## Warning in randomForest.default(x, y, mtry = res[which.min(res[, 2]), 1], :
## The response has five or fewer unique values. Are you sure you want to do
## regression?

3個のときに,Out-of-Bag誤差(OOB error)が最大であることは変わらない

チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。

gesui.rf <- randomForest(  # 予測、分類器の構築
  kinkyuudo ~ ., # モデル式
  data = gesui,  # データ
  mtry = gesui.tune$mtry)  # 分岐に使う変数の数
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
gesui.rf
## 
## Call:
##  randomForest(formula = kinkyuudo ~ ., data = gesui, mtry = gesui.tune$mtry) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 4
## 
##           Mean of squared residuals: 1.476213
##                     % Var explained: 30.78
x=gesui.rf$importance

出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。 上の例では正解率69.04% ”0”(緊急度3以下)と218個予測したうち、実際に”0”だったものが162個、“2”だったものが2個、“3”だったものが54と読み取れます。

#重要度順のグラフを出力

rank <- data.frame(x)  # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank)  # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),]  # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank)  # ランキングを行名にする
rank
##    IncNodePurity       factor
## 1     137.118695         long
## 2     132.486026 sitadokaburi
## 3     130.789376        slope
## 4     124.661796   uedokaburi
## 5     116.354171       kansyu
## 6      69.429390        nendo
## 7      62.659995          kei
## 8      47.334756   masuhonsuu
## 9      38.187225    ekijyouka
## 10     19.239761          did
## 11     14.344117       kouhou
## 12      9.550413       taisyo
## 13      5.626238        kubun

重要度順のグラフを出力

varImpPlot(gesui.rf)

plot(gesui, col=c(2, 3, 4)[gesui$kionkyudo])

塩ビ対処の判定本番データ

-下水道データ読み込み# 基本統計量表示 gesui # 教科書ではlogit

#gesui = read_csv("osui2.csv")
gesui = read_csv("enbi.csv")
## Parsed with column specification:
## cols(
##   OBJECTID = col_double(),
##   sys_name = col_double(),
##   slope = col_double(),
##   uedokaburi = col_double(),
##   masuhonsuu = col_double(),
##   long = col_double(),
##   kubun = col_double(),
##   did = col_double(),
##   kouhou = col_double(),
##   nendo = col_double(),
##   ekijyouka = col_double(),
##   kyouyounensuu = col_double(),
##   kansyu = col_double(),
##   kei = col_double(),
##   kinkyuudo = col_double(),
##   taisyo = col_double()
## )
gesui <- data.frame(gesui) # 教科書ではlogit
#testデータの行番号取得
#randomgesui<-sample(282,200)
#train <- gesui[randomgesui,]
#test <-gesui[-randomgesui,]
#cat(test$sys_name, file = "testrow.txt",append=FALSE)
#write.table(test,"testoutput.txt", quote=F, 
#             col.names=T, append=T)

gesui <- gesui[-1:-2] #OBJECTID,sys_name列をデータから削除
gesui <- gesui[-13]
gesui <- gesui[-8]
gesui <- gesui[-10]

塩ビ管データの基本統計量

stargazer(as.data.frame(gesui),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 282 3.309 2.017 0.000 1.900 4.100 9.900
uedokaburi 282 4.218 2.570 1.009 2.462 5.397 13.385
masuhonsuu 282 1.284 1.765 0 0 2 11
long 282 31.300 15.309 0.970 21.325 40.492 96.820
kubun 282 1.209 0.407 1 1 1 2
did 282 0.766 0.424 0 1 1 1
kouhou 282 0.337 0.473 0 0 1 1
ekijyouka 282 0.202 0.402 0 0 0 1
kyouyounensuu 282 27.514 5.204 10 25 27 40
kei 282 390.248 162.287 200 250 600 900
taisyo 282 0.312 0.464 0 0 1 1

塩ビ管データのカテゴリー変数の指定

gesui$taisyo <- as.factor(gesui$taisyo)
#gesui$kansyu <- as.factor(gesui$kansyu)

gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
#gesui$kinkyuudo <- as.factor(gesui$kinkyuudo)

sapply(gesui, class)
##         slope    uedokaburi    masuhonsuu          long         kubun 
##     "numeric"     "numeric"     "numeric"     "numeric"      "factor" 
##           did        kouhou     ekijyouka kyouyounensuu           kei 
##      "factor"     "numeric"      "factor"     "numeric"     "numeric" 
##        taisyo 
##      "factor"
summary(gesui)
##      slope         uedokaburi       masuhonsuu          long       kubun  
##  Min.   :0.000   Min.   : 1.009   Min.   : 0.000   Min.   : 0.97   1:223  
##  1st Qu.:1.900   1st Qu.: 2.462   1st Qu.: 0.000   1st Qu.:21.32   2: 59  
##  Median :2.685   Median : 3.402   Median : 1.000   Median :30.06          
##  Mean   :3.309   Mean   : 4.218   Mean   : 1.284   Mean   :31.30          
##  3rd Qu.:4.100   3rd Qu.: 5.397   3rd Qu.: 2.000   3rd Qu.:40.49          
##  Max.   :9.900   Max.   :13.385   Max.   :11.000   Max.   :96.82          
##  did         kouhou       ekijyouka kyouyounensuu        kei        taisyo 
##  0: 66   Min.   :0.0000   0:225     Min.   :10.00   Min.   :200.0   0:194  
##  1:216   1st Qu.:0.0000   1: 57     1st Qu.:25.00   1st Qu.:250.0   1: 88  
##          Median :0.0000             Median :25.00   Median :250.0          
##          Mean   :0.3369             Mean   :27.51   Mean   :390.2          
##          3rd Qu.:1.0000             3rd Qu.:27.00   3rd Qu.:600.0          
##          Max.   :1.0000             Max.   :40.00   Max.   :900.0

学習用データとテストデータの区分化

randomgesui<-sample(282,200)
train <- gesui[randomgesui,]
test <-gesui[-randomgesui,]
gesui <- train

#3/26 学習データ数を254、テストデータ数を28に変更

data <- data.frame(gesui) # 教科書ではlogit
data = read_csv("enbi.csv")
## Parsed with column specification:
## cols(
##   OBJECTID = col_double(),
##   sys_name = col_double(),
##   slope = col_double(),
##   uedokaburi = col_double(),
##   masuhonsuu = col_double(),
##   long = col_double(),
##   kubun = col_double(),
##   did = col_double(),
##   kouhou = col_double(),
##   nendo = col_double(),
##   ekijyouka = col_double(),
##   kyouyounensuu = col_double(),
##   kansyu = col_double(),
##   kei = col_double(),
##   kinkyuudo = col_double(),
##   taisyo = col_double()
## )
data <- data.frame(data) 
set.seed(20180924)
sub <- sample(1:282, 254)
gesui <- data[sub,]
gesui
##     OBJECTID sys_name slope uedokaburi masuhonsuu  long kubun did kouhou nendo
## 223     1363      223 1.340   3.484858          4 20.20     1   1      0  1991
## 15       151       15 1.740   1.436539          3  9.25     2   1      0  1977
## 180     1237      180 2.130   2.660371          2 29.31     1   1      0  1976
## 214     1354      214 2.320   4.574894          1 30.18     2   1      0  1976
## 57       338       57 3.900   2.796943          2 30.01     2   1      0  1979
## 192     1256      192 2.300   2.346045          2 29.68     1   1      0  1976
## 63       344       63 4.100   1.694001          1 31.01     2   1      0  1979
## 132      979      132 1.100  10.907328          0 22.06     1   1      1  1991
## 149     1087      149 3.200   3.916999          0 40.78     1   0      1  1991
## 215     1355      215 5.600   2.498483          5 26.90     1   1      0  1991
## 267     1578      267 2.450   2.518753          2 26.93     1   1      0  1991
## 245     1458      245 1.010   3.850776          1 24.86     1   1      0  1992
## 168     1212      168 3.300  10.594034          0 30.59     1   1      1  1991
## 30       230       30 5.000   6.649337          0 14.13     1   1      1  1990
## 47       284       47 1.610   9.463612          1 15.54     1   1      0  1991
## 32       232       32 1.580   4.386176          0  5.96     1   1      0  1999
## 97       777       97 3.600   5.855999          1 55.42     1   0      0  1989
## 225     1365      225 1.990   2.754591          3 34.95     2   1      0  1991
## 130      977      130 1.000   6.382495          0 31.11     1   1      1  1990
## 178     1222      178 2.100   5.403999          0 54.79     1   0      1  1991
## 22       193       22 1.500   3.253370          0 13.00     1   0      0  1991
## 92       770       92 1.400   6.330505          0 19.13     1   1      1  1989
## 176     1220      176 3.000   6.259277          0 58.39     1   0      1  1991
## 107      891      107 3.300   4.910003          2 65.07     1   1      1  1989
## 79       447       79 1.820   1.366309          0 23.59     2   1      0  1992
## 139     1077      139 2.100   4.224999          0 50.09     1   0      1  1991
## 212     1352      212 2.120   2.557118          5 32.00     1   1      0  1976
## 18       189       18 2.200   6.027199          0 10.12     1   0      1  1992
## 241     1441      241 3.300   2.621705          3 33.74     1   1      0  1991
## 200     1328      200 3.400   5.916304          0 63.02     1   1      1  1990
## 64       345       64 2.500   1.617001          3 24.86     2   1      0  1979
## 20       191       20 5.000   3.847243          1 15.15     1   0      0  1991
## 183     1240      183 2.040   2.161000          0 29.91     1   1      0  1976
## 16       152       16 4.270   1.472376          0  7.86     2   1      0  1977
## 99       779       99 2.100   3.525528          1 63.57     1   0      0  1989
## 34       243       34 6.500   4.590000          0  9.95     1   0      1  1982
## 156     1103      156 1.800   3.661790          2 28.05     1   0      1  1988
## 113      960      113 4.900   3.828565         11 51.83     2   1      0  1992
## 125      972      125 2.100  10.516000          0 42.60     1   1      1  1991
## 274     1590      274 2.670   2.685897          0 27.49     1   1      0  1991
## 169     1213      169 4.600  10.591164          1 26.39     1   0      1  1991
## 144     1082      144 4.100   3.810922          0 48.17     1   0      1  1991
## 106      874      106 1.900   3.748734          0 42.66     2   0      0  1989
## 211     1351      211 1.480   1.864272          0 20.00     1   1      0  1976
## 166     1207      166 6.900   2.981946          0 34.58     1   1      0  1990
## 161     1201      161 2.800   2.990898          0 54.94     1   0      0  2006
## 82       565       82 4.500   6.974002          0 96.82     1   1      1  1989
## 184     1241      184 2.030   2.351002          0 30.00     1   1      0  1990
## 242     1442      242 6.800   4.806221          0 29.86     1   1      0  1991
## 114      961      114 3.500   3.400039         10 52.24     2   1      0  1992
## 69       350       69 5.500   1.486400          3 26.99     2   1      0  1979
## 121      968      121 1.700  12.168823          0 30.58     1   1      1  1991
## 19       190       19 5.000   6.957029          1 15.65     1   0      1  1991
## 185     1242      185 1.730   1.838435          1 27.80     1   1      0  1990
## 278     1594      278 3.710   2.891690          3 27.74     1   1      0  1991
## 1          6        1 1.220   1.054575          1  3.39     2   1      0  2004
## 252     1550      252 3.190   9.491322          4 19.56     1   1      0  1992
## 170     1214      170 3.200   8.921724          1 52.98     1   1      1  1991
## 164     1204      164 4.100   3.150200          1 50.89     1   0      0  1990
## 126      973      126 2.500   9.882314          0 41.64     1   1      1  1991
## 240     1440      240 1.250   2.002232          6 26.20     1   1      0  1991
## 153     1091      153 2.100   4.563110          0 38.55     1   0      1  1991
## 205     1335      205 4.060   1.982026          4 25.49     1   1      0  1990
## 46       282       46 7.720   9.556511          5 11.01     1   1      0  1991
## 150     1088      150 2.700   3.925999          0 41.25     1   0      1  1991
## 210     1349      210 9.700   2.816253          3 31.90     1   1      0  1991
## 115      962      115 4.120   4.084854          7 51.90     2   1      1  1992
## 135      982      135 1.000   4.740499          0 24.86     1   1      1  1990
## 108      895      108 9.600   2.481001          3 39.08     2   1      0  1990
## 181     1238      181 2.300   1.660551          1 19.08     1   1      0  1976
## 254     1552      254 2.590   9.356372          0 19.97     1   0      0  1991
## 21       192       21 1.500   2.211840          0 14.47     1   0      0  1991
## 231     1421      231 5.400  10.806235          0 40.03     1   0      1  1982
## 155     1102      155 2.400   4.211381          4 59.97     1   0      1  1988
## 187     1244      187 1.880   2.855000          2 30.04     1   1      0  1990
## 91       769       91 1.000   6.574005          1 22.79     1   1      1  1989
## 197     1261      197 2.420   4.470321          1 29.94     2   1      0  1976
## 40       251       40 4.100   2.749357          0 10.02     1   1      0  1991
## 140     1078      140 1.900   4.243001          0 50.01     1   0      1  1991
## 162     1202      162 3.200   2.937999          0 36.44     1   0      0  1990
## 86       763       86 1.400   7.042785          1 40.03     1   1      1  1989
## 190     1253      190 9.200   2.236709          3 33.61     1   1      0  1990
## 206     1336      206 3.230   1.742547          3 30.95     1   1      0  1990
## 202     1332      202 4.020   2.240832          2 31.20     1   1      0  1990
## 237     1435      237 4.210   2.289566          1 23.22     2   1      0  1991
## 85       762       85 1.100   4.548029          0 42.85     1   0      1  1989
## 41       252       41 1.760   2.432278          0  7.87     1   1      0  1991
## 186     1243      186 1.820   2.143259          2 29.91     1   1      0  1990
## 148     1086      148 4.200   4.861001          0 40.29     1   0      1  1991
## 272     1588      272 1.960   2.694672          3 27.25     1   1      0  1991
## 71       429       71 1.060   2.725467          1 29.09     2   1      0  1990
## 110      915      110 3.500   3.403083          4 37.96     2   1      0  1995
## 281     1597      281 4.400   4.937003          0 39.80     1   1      1  1991
## 261     1572      261 3.210   3.068203          6 46.50     1   1      0  1991
## 61       342       61 2.400   1.956000          2 25.15     2   1      0  1979
## 276     1592      276 4.800   4.976580          0 55.17     1   1      1  1991
## 136      983      136 1.100   6.537330          0 47.24     1   1      1  1990
## 262     1573      262 4.300   6.827301          0 51.84     1   1      1  1991
## 188     1245      188 1.790   2.212618          2 30.12     1   1      0  1990
## 93       771       93 5.000   3.920159          1 32.19     1   1      0  1989
## 266     1577      266 2.280   2.663494          2 27.46     1   1      0  1991
## 165     1206      165 3.100   3.199907          0 44.39     1   0      0  1990
## 228     1394      228 2.590   2.475453          0 32.61     1   1      0  1976
## 56       335       56 5.700   3.024835          3 36.10     2   1      0  1976
## 233     1427      233 2.060   4.124568          0 77.03     1   1      1  1991
## 250     1546      250 1.710   9.652271          2 18.98     1   1      0  1991
## 255     1553      255 2.600   9.327517          1 28.02     1   0      0  1991
## 26       198       26 1.690   1.824348          0 10.00     1   1      0  1990
## 173     1217      173 3.000   5.177794          1 18.24     1   0      0  1991
## 277     1593      277 3.670   2.667007          0 27.45     1   1      0  1991
## 44       255       44 2.440   3.414251          0 14.07     2   1      0  1992
## 201     1331      201 3.900   2.064381          2 29.98     1   1      0  1990
## 247     1463      247 4.100   6.122001          0 46.81     1   1      1  1991
## 217     1357      217 8.000   2.589767          3 19.90     1   1      0  1991
## 249     1545      249 3.130   9.776862          2 19.94     1   1      0  1991
## 204     1334      204 4.070   1.891763          1 24.00     1   1      0  1990
## 172     1216      172 3.300   5.519844          1 21.85     1   0      1  1991
## 251     1547      251 1.630   9.552125          2 29.99     1   1      0  1991
## 29       225       29 1.895   3.646695          0  2.71     1   1      0  1989
## 14       146       14 1.300   6.858791          0  2.79     1   1      1  1991
## 159     1199      159 2.900   2.635552          1 48.85     1   0      0  1990
## 65       346       65 3.600   1.572562          1 24.52     2   1      0  1979
## 3         19        3 4.710   1.414000          0  5.02     2   1      0  1992
## 163     1203      163 3.800   2.868704          1 37.59     1   0      0  1990
## 53       293       53 3.580   2.603039          1 16.04     1   1      0  1991
## 112      942      112 2.500   3.922952          3 40.56     1   1      1  1984
## 67       348       67 4.300   1.739730          0 24.93     2   1      0  1979
## 49       289       49 2.730   2.860160          1  7.52     1   1      0  1991
## 220     1360      220 1.600   2.942733          3 31.80     1   1      0  1991
## 208     1344      208 4.010   1.798224          2 29.92     1   1      0  1990
## 128      975      128 3.300   7.612320          0 38.28     1   1      1  1990
## 9        131        9 1.300   1.819819          1  9.12     1   0      0  1989
## 157     1197      157 6.100   3.115186          0 43.01     1   0      0  1990
## 6         23        6 8.900   1.738222          0 15.72     2   1      0  1992
## 101      781      101 5.500   3.339510          0 36.44     1   0      0  1989
## 58       339       58 4.400   2.611828          3 26.75     2   1      0  1979
## 244     1457      244 1.030   5.408001          0 41.07     1   1      1  1992
## 138     1076      138 1.800   4.652454          0 50.20     1   0      1  1991
## 119      966      119 2.300  10.198399          3 45.18     1   1      1  1991
## 131      978      131 8.700   1.926552          4 31.86     2   1      0  1990
## 105      873      105 9.500   1.942961          0 39.40     1   1      0  1989
## 39       250       39 3.900   2.736001          1 12.99     1   1      0  1991
## 117      964      117 2.400   7.072001          2 51.09     1   1      1  1991
## 52       292       52 3.280   3.394147          1 15.78     1   1      0  1991
## 189     1252      189 2.630   1.843763          2 27.15     1   1      0  1976
## 265     1576      265 2.280   2.499810          2 27.46     1   1      0  1991
## 37       248       37 8.700   2.607048          1  8.70     1   1      0  1991
## 207     1337      207 3.830   1.909999          2 28.60     1   1      0  1989
## 68       349       68 4.900   1.586905          2 24.91     2   1      0  1979
## 263     1574      263 5.400   5.521916          0 54.91     1   1      1  1991
## 66       347       66 6.200   1.243001          4 46.92     2   1      0  1988
## 103      785      103 2.600   5.532493          0 31.99     1   0      1  1989
## 258     1560      258 2.960   3.700743          3 26.81     1   1      0  1991
## 23       194       23 1.400   3.677926          1 13.85     1   0      0  1991
## 84       761       84 1.200   4.675212          0 45.45     1   0      1  1989
## 142     1080      142 3.400   4.247999          0 52.10     1   0      1  1991
## 62       343       62 4.500   1.664466          1 26.14     2   1      0  1979
## 280     1596      280 3.710   1.889743          4 31.13     1   1      0  1991
## 43       254       43 3.380   3.460195          1 12.04     2   1      0  1991
## 243     1443      243 5.300   5.008060          0 21.81     1   1      0  1991
## 28       201       28 1.720   2.119883          0 17.00     1   1      0  1976
## 4         20        4 1.100   1.544714          3 13.17     2   1      0  1992
## 268     1579      268 2.900   7.568516          0 54.50     1   1      1  1991
## 54       294       54 1.630   2.476748          4 45.65     1   1      0  1991
## 88       766       88 3.000   6.631325          0 45.98     1   1      1  1989
## 174     1218      174 3.400   2.611999          2 25.26     1   0      0  1991
## 198     1262      198 2.370   3.777865          0 29.87     2   1      0  1976
## 279     1595      279 2.380   4.197623          5 40.25     1   1      0  1991
## 143     1081      143 2.300   4.438878          0 46.81     1   0      1  1991
## 175     1219      175 4.000   3.250242          0 17.93     1   0      0  1991
## 78       446       78 2.120   1.008538          2 30.99     2   1      0  1992
## 182     1239      182 2.140   1.856014          1 29.30     1   1      0  1976
## 74       432       74 4.600   2.853477          0 39.85     2   1      0  1990
## 118      965      118 7.000   7.090262          0 31.00     1   1      1  1991
## 83       679       83 3.000   5.669425          9 40.00     1   1      1  1983
## 222     1362      222 7.800   7.650076          1 21.90     1   1      0  1991
## 196     1260      196 2.940   2.054423          2 28.86     1   1      0  1976
## 218     1358      218 9.900   2.596578          4 32.30     1   1      0  1991
## 273     1589      273 4.700   6.140733          1 54.96     1   1      1  1991
## 264     1575      264 1.990   2.896992          2 27.42     1   1      0  1991
## 271     1582      271 3.350   2.644891          2 36.92     1   1      0  1991
## 100      780      100 4.700   3.494682          1 33.71     1   0      0  1989
## 160     1200      160 1.100   2.491678          1 54.95     1   0      0  1990
## 224     1364      224 2.500   3.318911          4 40.96     1   1      0  1991
## 253     1551      253 1.660   9.424565          4 31.98     1   1      0  1991
## 2          7        2 2.500   1.533001          0  7.78     2   1      0  1988
## 221     1361      221 1.400   7.191373          1 32.10     1   1      0  1991
## 145     1083      145 2.800   5.456998          0 21.43     1   0      1  1991
## 151     1089      151 1.400   4.105612          0 37.92     1   0      1  1991
## 124      971      124 2.400  11.925139          0 34.16     1   1      1  1991
## 77       445       77 1.220   1.225071          0 25.56     2   1      0  1992
## 256     1554      256 3.990   1.993982          1 28.02     1   0      0  1991
## 96       776       96 3.300   3.213998          1 29.89     1   1      0  1989
## 11       141       11 1.300   3.583047          0 10.86     2   1      0  1976
## 232     1422      232 5.200  11.221190          0 44.00     1   0      1  1982
## 73       431       73 4.500   2.714951          0 39.87     2   1      0  1990
## 50       290       50 4.190   2.680456          0  3.50     1   1      0  1991
## 171     1215      171 2.100   8.576775          0 52.11     1   1      1  1991
## 33       233       33 1.428   2.864370          0  7.68     1   1      0  1991
## 90       768       90 3.700   6.981265          0 26.86     1   1      1  1989
## 35       244       35 3.030   6.923674          0  9.21     1   0      1  1982
## 116      963      116 2.700   7.091002          1 41.88     1   1      1  1991
## 75       443       75 1.200   1.428993          0 27.22     2   1      0  1992
## 236     1434      236 3.690   4.178999          2 24.00     1   1      0  1991
## 38       249       38 3.620   3.316654          0 11.03     1   1      0  1991
## 70       351       70 6.300   1.578893          1 20.45     2   1      0  1979
## 104      786      104 2.560   5.077001          0 61.47     1   0      1  1989
## 55       295       55 1.787   2.453790          0  6.24     1   1      0  1991
## 282     1598      282 4.500   5.842998          0 39.55     1   1      1  1991
## 167     1211      167 2.600   8.277668          0 28.97     1   1      1  1991
## 227     1367      227 6.250   2.031941          3 19.73     2   1      0  1991
## 102      784      102 2.300   5.376960          0 39.12     1   0      1  1989
## 10       140       10 1.240   3.505754          0  9.42     2   1      0  1976
## 270     1581      270 1.940   2.559030          1 28.36     1   0      0  1991
## 146     1084      146 2.600   5.497247          0 49.16     1   0      1  1991
## 179     1229      179 7.070   3.457646          1 30.09     1   1      0  1989
## 158     1198      158 4.200   2.784306          0 42.89     1   0      0  1990
## 234     1432      234 2.010   2.395267          1 38.81     1   1      0  1991
## 111      916      111 2.400   3.824999          0 38.06     2   1      0  1995
## 72       430       72 6.700   2.457513          0 28.08     2   1      0  1990
## 89       767       89 1.900   6.749645          0 50.18     1   1      1  1989
## 248     1464      248 2.000   6.358178          0 49.37     1   1      1  1992
## 239     1439      239 8.000   2.291207          0 19.86     1   1      0  1991
## 120      967      120 1.600  11.327685          1 52.05     1   1      1  1991
## 13       143       13 3.200   3.852999          1  9.97     1   1      1  1984
## 24       195       24 7.000   3.632331          0  9.98     1   1      0  1989
## 80       448       80 1.030   1.657000          2 31.00     2   1      0  1992
## 87       764       87 1.800   4.378568          2 53.43     1   1      1  1989
## 60       341       60 4.200   2.061075          1 31.17     2   1      0  1979
## 25       197       25 0.000   1.686499          0  0.97     1   0      0  1985
## 219     1359      219 1.170   2.312506          7 31.70     1   1      0  1991
## 48       288       48 3.770   3.731845          1  8.54     1   1      0  1991
## 76       444       76 7.300   1.251538          0 24.68     2   1      0  1992
## 7         60        7 9.100   2.981000          0 14.26     1   1      0  1982
## 230     1420      230 5.400  10.298999          0 56.50     1   0      1  1982
## 152     1090      152 2.900   3.830998          0 39.16     1   0      1  1991
## 51       291       51 1.170   4.029050          1 10.18     2   1      0  1991
## 199     1325      199 1.400   4.845603          0 77.66     1   1      1  1990
## 194     1258      194 1.700   2.277687          2 30.08     1   1      0  1976
## 229     1395      229 2.590   2.813741          0 32.84     1   1      0  1976
## 275     1591      275 2.560   3.020516          0 27.48     1   1      0  1991
## 45       267       45 1.550  13.384647          3  8.50     1   1      0  1991
## 94       773       94 1.900   4.610782          3 48.88     1   0      1  1989
## 147     1085      147 2.000   5.643243          0 46.96     1   0      1  1991
## 226     1366      226 6.170   2.612946          3 49.95     2   1      0  1991
## 8         61        8 6.300   3.104421          3 15.91     1   1      0  1982
## 81       449       81 9.400   1.665000          1 21.29     2   1      0  1992
## 141     1079      141 2.100   4.467487          0 52.20     1   0      1  1991
## 177     1221      177 2.000   5.180002          0 54.80     1   0      1  1991
## 133      980      133 1.100   5.270930          0 42.22     1   1      1  1990
## 98       778       98 2.800   3.939002          0 55.84     1   0      0  1989
## 12       142       12 0.000   2.633001          0  5.00     2   1      0  1984
## 42       253       42 2.650   2.806566          0 10.02     2   1      0  1992
## 31       231       31 9.400   5.956002          0  5.65     1   1      1  1990
##     ekijyouka kyouyounensuu kansyu kei kinkyuudo taisyo
## 223         0            25      2 250         0      0
## 15          1            39      2 250         0      0
## 180         0            40      2 250         3      1
## 214         0            40      2 250         3      1
## 57          1            37      2 250         3      1
## 192         0            40      2 250         3      1
## 63          0            37      2 250         3      1
## 132         0            25      2 600         0      0
## 149         0            25      2 600         0      0
## 215         0            25      2 250         3      1
## 267         0            25      2 250         0      0
## 245         1            24      2 300         0      0
## 168         0            25      2 600         0      0
## 30          0            26      2 400         0      0
## 47          0            25      2 250         0      0
## 32          0            17      2 400         0      0
## 97          0            27      2 600         2      1
## 225         0            25      2 250         0      0
## 130         1            26      2 600         0      0
## 178         0            25      2 600         0      0
## 22          0            25      2 600         0      0
## 92          0            27      2 600         3      1
## 176         0            25      2 600         3      1
## 107         0            27      2 350         0      0
## 79          1            24      2 250         0      0
## 139         0            25      2 600         0      0
## 212         0            40      2 250         3      1
## 18          0            24      2 600         0      0
## 241         0            25      2 250         3      1
## 200         1            26      2 600         3      1
## 64          0            37      2 250         3      1
## 20          0            25      2 600         0      0
## 183         0            40      2 250         3      1
## 16          1            39      2 250         0      0
## 99          0            27      2 600         0      0
## 34          0            34      2 300         0      0
## 156         1            28      2 500         0      0
## 113         0            24      2 250         0      0
## 125         0            25      2 600         0      0
## 274         0            25      2 250         0      0
## 169         0            25      2 600         3      1
## 144         0            25      2 600         3      1
## 106         0            27      2 300         0      0
## 211         0            40      2 250         3      1
## 166         0            26      2 600         0      0
## 161         0            10      2 600         0      0
## 82          1            27      2 400         0      0
## 184         0            26      2 250         3      1
## 242         0            25      2 250         0      0
## 114         0            24      2 250         0      0
## 69          1            37      2 250         0      0
## 121         0            25      2 600         3      1
## 19          0            25      2 600         3      1
## 185         0            26      2 250         3      1
## 278         0            25      2 250         0      0
## 1           1            12      2 200         0      0
## 252         0            24      2 250         0      0
## 170         0            25      2 600         0      0
## 164         0            26      2 600         0      0
## 126         0            25      2 600         0      0
## 240         0            25      2 250         0      0
## 153         0            25      2 600         0      0
## 205         0            26      2 250         3      1
## 46          0            25      2 250         0      0
## 150         0            25      2 600         0      0
## 210         0            25      2 250         3      1
## 115         0            24      2 250         0      0
## 135         1            26      2 600         0      0
## 108         0            26      2 250         0      0
## 181         0            40      2 250         3      1
## 254         0            25      2 250         0      0
## 21          0            25      2 600         0      0
## 231         0            34      2 350         0      0
## 155         1            28      2 500         3      1
## 187         1            26      2 250         3      1
## 91          0            27      2 600         0      0
## 197         0            40      2 250         3      1
## 40          0            25      2 250         0      0
## 140         0            25      2 600         0      0
## 162         0            26      2 600         0      0
## 86          1            27      2 600         0      0
## 190         0            26      2 250         3      1
## 206         0            26      2 250         3      1
## 202         0            26      2 250         3      1
## 237         0            25      2 250         0      0
## 85          0            27      2 600         3      1
## 41          0            25      2 250         0      0
## 186         1            26      2 250         0      0
## 148         0            25      2 600         0      0
## 272         0            25      2 250         0      0
## 71          0            26      2 250         0      0
## 110         0            21      2 250         0      0
## 281         0            25      2 450         0      0
## 261         0            25      2 450         0      0
## 61          1            37      2 250         3      1
## 276         0            25      2 450         0      0
## 136         1            26      2 600         0      0
## 262         0            25      2 450         0      0
## 188         1            26      2 250         3      1
## 93          1            27      2 600         0      0
## 266         0            25      2 250         0      0
## 165         0            26      2 600         0      0
## 228         0            40      2 450         0      0
## 56          1            40      2 250         3      1
## 233         0            25      2 500         0      0
## 250         0            25      2 250         0      0
## 255         0            25      2 250         0      0
## 26          0            26      2 250         3      1
## 173         0            25      2 600         0      0
## 277         0            25      2 250         0      0
## 44          0            24      2 250         0      0
## 201         0            26      2 250         3      1
## 247         1            25      2 450         0      0
## 217         0            25      2 250         3      1
## 249         0            25      2 250         0      0
## 204         0            26      2 250         3      1
## 172         0            25      2 600         0      0
## 251         0            25      2 250         0      0
## 29          0            27      2 250         0      0
## 14          0            25      2 600         0      0
## 159         0            26      2 600         0      0
## 65          0            37      2 250         2      1
## 3           1            24      2 250         0      0
## 163         0            26      2 600         0      0
## 53          0            25      2 450         0      0
## 112         0            32      2 700         0      0
## 67          0            37      2 250         3      1
## 49          0            25      2 300         0      0
## 220         0            25      2 250         3      1
## 208         0            26      2 250         3      1
## 128         0            26      2 600         0      0
## 9           0            27      2 250         0      0
## 157         0            26      2 600         0      0
## 6           1            24      2 250         0      0
## 101         0            27      2 600         3      1
## 58          1            37      2 250         3      1
## 244         1            24      2 300         0      0
## 138         0            25      2 600         3      1
## 119         0            25      2 600         0      0
## 131         1            26      2 250         0      0
## 105         0            27      2 250         0      0
## 39          0            25      2 250         0      0
## 117         0            25      2 600         3      1
## 52          0            25      2 450         0      0
## 189         0            40      2 250         3      1
## 265         0            25      2 250         0      0
## 37          0            25      2 250         0      0
## 207         0            27      2 250         3      1
## 68          0            37      2 250         3      1
## 263         0            25      2 450         0      0
## 66          1            28      2 250         0      0
## 103         0            27      2 600         0      0
## 258         0            25      2 300         0      0
## 23          0            25      2 600         0      0
## 84          0            27      2 600         0      0
## 142         0            25      2 600         3      1
## 62          0            37      2 250         3      1
## 280         0            25      2 450         3      1
## 43          0            25      2 250         0      0
## 243         0            25      2 250         0      0
## 28          0            40      2 250         3      1
## 4           1            24      2 250         0      0
## 268         0            25      2 450         0      0
## 54          0            25      2 250         0      0
## 88          1            27      2 600         0      0
## 174         0            25      2 600         0      0
## 198         0            40      2 250         3      1
## 279         0            25      2 450         0      0
## 143         0            25      2 600         3      1
## 175         0            25      2 600         0      0
## 78          1            24      2 250         0      0
## 182         0            40      2 250         3      1
## 74          1            26      2 250         0      0
## 118         0            25      2 600         3      1
## 83          0            33      2 900         0      0
## 222         0            25      2 250         2      1
## 196         0            40      2 250         3      1
## 218         0            25      2 250         3      1
## 273         0            25      2 450         0      0
## 264         0            25      2 250         0      0
## 271         0            25      2 450         0      0
## 100         0            27      2 600         3      1
## 160         0            26      2 600         0      0
## 224         0            25      2 250         3      1
## 253         0            25      2 250         0      0
## 2           0            28      2 250         2      1
## 221         0            25      2 250         3      1
## 145         0            25      2 600         0      0
## 151         0            25      2 600         0      0
## 124         0            25      2 600         0      0
## 77          1            24      2 250         0      0
## 256         0            25      2 250         0      0
## 96          0            27      2 600         0      0
## 11          0            40      2 250         0      0
## 232         0            34      2 350         0      0
## 73          1            26      2 250         0      0
## 50          0            25      2 300         0      0
## 171         0            25      2 600         0      0
## 33          0            25      2 400         0      0
## 90          0            27      2 600         0      0
## 35          0            34      2 350         0      0
## 116         0            25      2 600         0      0
## 75          1            24      2 250         0      0
## 236         0            25      2 250         0      0
## 38          0            25      2 250         0      0
## 70          1            37      2 250         0      0
## 104         0            27      2 350         0      0
## 55          0            25      2 450         0      0
## 282         0            25      2 450         0      0
## 167         0            25      2 600         3      1
## 227         0            25      2 250         0      0
## 102         0            27      2 600         3      1
## 10          0            40      2 250         0      0
## 270         0            25      2 250         0      0
## 146         0            25      2 600         3      1
## 179         1            27      2 300         0      0
## 158         0            26      2 600         0      0
## 234         0            25      2 250         0      0
## 111         0            21      2 250         0      0
## 72          1            26      2 250         3      1
## 89          1            27      2 600         0      0
## 248         1            24      2 500         0      0
## 239         0            25      2 250         0      0
## 120         0            25      2 600         0      0
## 13          0            32      2 700         0      0
## 24          1            27      2 300         0      0
## 80          1            24      2 250         0      0
## 87          1            27      2 600         0      0
## 60          1            37      2 250         3      1
## 25          1            31      2 250         0      0
## 219         0            25      2 250         3      1
## 48          0            25      2 300         0      0
## 76          1            24      2 250         0      0
## 7           0            34      2 250         3      1
## 230         0            34      2 350         0      0
## 152         0            25      2 600         0      0
## 51          0            25      2 250         0      0
## 199         1            26      2 600         3      1
## 194         0            40      2 250         3      1
## 229         0            40      2 450         0      0
## 275         0            25      2 250         0      0
## 45          0            25      2 250         0      0
## 94          1            27      2 600         0      0
## 147         0            25      2 600         3      1
## 226         0            25      2 250         0      0
## 8           0            34      2 250         3      1
## 81          1            24      2 250         0      0
## 141         0            25      2 600         0      0
## 177         0            25      2 600         0      0
## 133         1            26      2 600         0      0
## 98          0            27      2 600         3      1
## 12          0            32      2 250         0      0
## 42          0            24      2 200         0      0
## 31          0            26      2 400         0      0
test <-data[-sub,]
test
##     OBJECTID sys_name slope uedokaburi masuhonsuu  long kubun did kouhou nendo
## 5         22        5  1.80   4.412133          1  5.56     2   1      1  1992
## 17       167       17  4.80   5.841930          2 15.45     1   1      1  1991
## 27       200       27  2.12   2.417561          0  9.15     2   1      0  1976
## 36       247       36  2.26   2.336273          0  4.17     1   1      0  1991
## 59       340       59  4.50   2.244000          2 18.98     2   1      0  1979
## 95       775       95  2.10   3.617295          2 31.17     1   1      0  1989
## 109      896      109  3.50   3.160007          1 38.11     2   1      0  1990
## 122      969      122  1.80  12.289001          0 50.19     1   1      1  1991
## 123      970      123  2.40  12.201411          0 35.48     1   1      1  1991
## 127      974      127  1.80   8.155643          0 33.74     1   1      1  1990
## 129      976      129  2.50   5.257328          0 31.86     1   1      1  1990
## 134      981      134  3.90   5.204206          0 34.70     1   1      1  1990
## 137     1004      137  2.56   1.319381          5 29.01     2   1      0  1977
## 154     1101      154  1.90   4.014523          2 57.24     1   0      1  1988
## 191     1255      191  2.37   2.624365          2 29.00     1   1      0  1976
## 193     1257      193  2.05   2.293845          2 29.90     1   1      0  1976
## 195     1259      195  1.75   1.938404          2 32.00     1   1      0  1976
## 203     1333      203  3.73   2.509508          1 31.21     1   1      0  1990
## 209     1348      209  9.40   6.640145          0 32.30     1   1      1  1990
## 213     1353      213  2.06   2.240783          4 32.00     1   1      0  1976
## 216     1356      216  8.90   2.795862          4 25.90     1   1      0  1991
## 235     1433      235  1.11   2.970002          4 25.13     1   1      0  1991
## 238     1438      238  1.59   2.613002          5 33.09     1   1      0  1991
## 246     1462      246  4.10   5.822236          0 46.30     1   1      1  1991
## 257     1555      257  1.45   2.372000          6 33.94     1   0      0  1991
## 259     1561      259  3.07   4.151997          0 20.79     1   1      0  1991
## 260     1571      260  3.06   2.691162          4 32.29     1   1      0  1991
## 269     1580      269  3.00   7.043107          0 55.48     1   1      1  1991
##     ekijyouka kyouyounensuu kansyu kei kinkyuudo taisyo
## 5           1            24      2 250         3      1
## 17          0            25      2 600         3      1
## 27          0            40      2 250         3      1
## 36          0            25      2 250         0      0
## 59          1            37      2 250         3      1
## 95          0            27      2 600         3      1
## 109         0            26      2 250         0      0
## 122         0            25      2 600         0      0
## 123         0            25      2 600         0      0
## 127         0            26      2 600         0      0
## 129         1            26      2 600         3      1
## 134         1            26      2 600         3      1
## 137         1            39      2 250         0      0
## 154         1            28      2 500         0      0
## 191         0            40      2 250         0      0
## 193         0            40      2 250         2      1
## 195         0            40      2 250         3      1
## 203         0            26      2 250         3      1
## 209         0            26      2 400         0      0
## 213         0            40      2 250         3      1
## 216         0            25      2 250         3      1
## 235         0            25      2 250         0      0
## 238         0            25      2 250         0      0
## 246         1            25      2 450         0      0
## 257         0            25      2 250         0      0
## 259         0            25      2 300         0      0
## 260         0            25      2 450         0      0
## 269         0            25      2 450         0      0
gesui <- gesui[-1:-2] #OBJECTID,sys_name列をデータから削除
gesui <- gesui[-13]
gesui <- gesui[-8]
gesui <- gesui[-10]

test <- test[-1:-2] #OBJECTID,sys_name列をデータから削除
test <- test[-13]
test <- test[-8]
test <- test[-10]

gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)

test$taisyo <- as.factor(test$taisyo)
test$kubun <- as.factor(test$kubun)
test$did <- as.factor(test$did)
test$ekijyouka <- as.factor(test$ekijyouka)


sapply(gesui, class)
##         slope    uedokaburi    masuhonsuu          long         kubun 
##     "numeric"     "numeric"     "numeric"     "numeric"      "factor" 
##           did        kouhou     ekijyouka kyouyounensuu           kei 
##      "factor"     "numeric"      "factor"     "numeric"     "numeric" 
##        taisyo 
##      "factor"
summary(gesui)
##      slope         uedokaburi       masuhonsuu          long       kubun  
##  Min.   :0.000   Min.   : 1.009   Min.   : 0.000   Min.   : 0.97   1:200  
##  1st Qu.:1.900   1st Qu.: 2.476   1st Qu.: 0.000   1st Qu.:20.66   2: 54  
##  Median :2.765   Median : 3.409   Median : 1.000   Median :30.00          
##  Mean   :3.337   Mean   : 4.206   Mean   : 1.232   Mean   :31.39          
##  3rd Qu.:4.173   3rd Qu.: 5.397   3rd Qu.: 2.000   3rd Qu.:41.04          
##  Max.   :9.900   Max.   :13.385   Max.   :11.000   Max.   :96.82          
##  did         kouhou       ekijyouka kyouyounensuu        kei        taisyo 
##  0: 64   Min.   :0.0000   0:204     Min.   :10.00   Min.   :200.0   0:178  
##  1:190   1st Qu.:0.0000   1: 50     1st Qu.:25.00   1st Qu.:250.0   1: 76  
##          Median :0.0000             Median :25.00   Median :250.0          
##          Mean   :0.3307             Mean   :27.35   Mean   :391.9          
##          3rd Qu.:1.0000             3rd Qu.:27.00   3rd Qu.:600.0          
##          Max.   :1.0000             Max.   :40.00   Max.   :900.0
train <- gesui

塩ビ管の基本統計データ

stargazer(as.data.frame(gesui),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 254 3.337 2.024 0.000 1.900 4.173 9.900
uedokaburi 254 4.206 2.546 1.009 2.476 5.397 13.385
masuhonsuu 254 1.232 1.748 0 0 2 11
long 254 31.387 15.583 0.970 20.660 41.043 96.820
kouhou 254 0.331 0.471 0 0 1 1
kyouyounensuu 254 27.354 5.069 10 25 27 40
kei 254 391.929 163.533 200 250 600 900

学習データのの基本統計データ

stargazer(as.data.frame(train),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 254 3.337 2.024 0.000 1.900 4.173 9.900
uedokaburi 254 4.206 2.546 1.009 2.476 5.397 13.385
masuhonsuu 254 1.232 1.748 0 0 2 11
long 254 31.387 15.583 0.970 20.660 41.043 96.820
kouhou 254 0.331 0.471 0 0 1 1
kyouyounensuu 254 27.354 5.069 10 25 27 40
kei 254 391.929 163.533 200 250 600 900

推定データの基本統計データ

stargazer(as.data.frame(test),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 28 3.056 1.964 1.110 1.875 3.558 9.400
uedokaburi 28 4.328 2.828 1.319 2.406 5.399 12.289
masuhonsuu 28 1.750 1.878 0 0 2.5 6
long 28 30.505 12.764 4.170 25.707 34.130 57.240
kouhou 28 0.393 0.497 0 0 1 1
kyouyounensuu 28 28.964 6.221 24 25 30.2 40
kei 28 375.000 152.449 250 250 525 600

塩ビ管の異常判定結果

#model = randomForest(taisyo ~ ., data = gesui)
model = randomForest(taisyo ~ ., data = train)
#model = randomForest(kinkyuudo ~ ., data = gesui)
model
## 
## Call:
##  randomForest(formula = taisyo ~ ., data = train) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 19.29%
## Confusion matrix:
##     0  1 class.error
## 0 167 11  0.06179775
## 1  38 38  0.50000000
#predition = predict(model, gesui)
predition = predict(model, test)
predition
##   5  17  27  36  59  95 109 122 123 127 129 134 137 154 191 193 195 203 209 213 
##   0   0   0   0   1   0   0   0   0   0   0   0   0   0   1   1   1   1   0   1 
## 216 235 238 246 257 259 260 269 
##   1   0   1   0   0   0   0   0 
## Levels: 0 1
summary(predition)
##  0  1 
## 20  8

予測結果と実測値の対比

table(predition,test$taisyo)
##          
## predition  0  1
##         0 14  6
##         1  2  6
#sapply(gesui, class)
#summary(gesui)

モデルの変数重要度

model$importance
##               MeanDecreaseGini
## slope                17.029285
## uedokaburi           21.763576
## masuhonsuu            5.936029
## long                 23.536333
## kubun                 2.052436
## did                   2.518952
## kouhou                1.427450
## ekijyouka             2.976061
## kyouyounensuu        18.142166
## kei                   6.381062
varImpPlot(model)

dim(gesui)
## [1] 254  11
sapply(gesui, class)
##         slope    uedokaburi    masuhonsuu          long         kubun 
##     "numeric"     "numeric"     "numeric"     "numeric"      "factor" 
##           did        kouhou     ekijyouka kyouyounensuu           kei 
##      "factor"     "numeric"      "factor"     "numeric"     "numeric" 
##        taisyo 
##      "factor"
head(gesui)
##     slope uedokaburi masuhonsuu  long kubun did kouhou ekijyouka kyouyounensuu
## 223  1.34   3.484858          4 20.20     1   1      0         0            25
## 15   1.74   1.436539          3  9.25     2   1      0         1            39
## 180  2.13   2.660371          2 29.31     1   1      0         0            40
## 214  2.32   4.574894          1 30.18     2   1      0         0            40
## 57   3.90   2.796943          2 30.01     2   1      0         1            37
## 192  2.30   2.346045          2 29.68     1   1      0         0            40
##     kei taisyo
## 223 250      0
## 15  250      0
## 180 250      1
## 214 250      1
## 57  250      1
## 192 250      1
set.seed(123)#注1

#gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
#    gesui$kinkyuudo,  # 目的変数

gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
     gesui$taisyo,  # 目的変数
  doBest = T)  #分岐に使う変数の数(mtry)を求めるフラグ
## mtry = 3  OOB error = 18.11% 
## Searching left ...
## mtry = 2     OOB error = 18.11% 
## 0 0.05 
## Searching right ...
## mtry = 6     OOB error = 18.9% 
## -0.04347826 0.05

set.seed(123)#注1 #gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数 # gesui\(kinkyuudo, # 目的変数 gesui.tune <- tuneRF(gesui %>% select(-taisyo),# 説明変数 gesui\)taisyo,# 目的変数 doBest = T)#分岐に使う変数の数(mtry)を求めるフラグ

この結果,特徴量の個数が 3個以上のときに,Out-of-Bag誤差(OOB error)は3.99% 2個のときに,Out-of-Bag誤差は4.57%、 1個のときに,Out-of-Bag誤差は4.36%、

となり,特徴量の個数が3個のときにOut-of-Bag誤差が最少となり, この個数に設定するのが良さそうであることがわかる*1

構築する決定木の個数を増やしてみる ntreeTry引数はデフォルトでは50となっており,50個の決定木を構築することがわかる.1500個の決定木を構築するように指定してみよう.

set.seed(123)#注1
#gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
#  gesui$kinkyuudo,  # 目的変数
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
  gesui$taisyo,  # 目的変数
  ntreeTry=2500, #決定木数
   trace = TRUE, 
  doBest = T)
## mtry = 3  OOB error = 18.5% 
## Searching left ...
## mtry = 2     OOB error = 18.9% 
## -0.0212766 0.05 
## Searching right ...
## mtry = 6     OOB error = 18.5% 
## 0 0.05

6個のときに,Out-of-Bag誤差(OOB error)が最大となり、5.06%となっている。

チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。

gesui.rf2 <- randomForest(  # 予測、分類器の構築
#  kinkyuudo ~ ., # モデル式
  taisyo ~ ., # モデル式
  data = gesui,  # データ
  mtry = gesui.tune$mtry)  # 分岐に使う変数の数

パラメータチューニング後の推定結果

predrandam = predict(gesui.rf2, test)
predrandam
##   5  17  27  36  59  95 109 122 123 127 129 134 137 154 191 193 195 203 209 213 
##   0   0   0   0   1   0   0   0   0   0   0   0   0   0   1   1   1   1   0   1 
## 216 235 238 246 257 259 260 269 
##   1   0   1   0   0   0   0   0 
## Levels: 0 1
summary(predrandam)
##  0  1 
## 20  8
table(predrandam,test$taisyo)
##           
## predrandam  0  1
##          0 14  6
##          1  2  6

下水劣化推定変数の重要度

gesui.rf2
## 
## Call:
##  randomForest(formula = taisyo ~ ., data = gesui, mtry = gesui.tune$mtry) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 17.72%
## Confusion matrix:
##     0  1 class.error
## 0 168 10  0.05617978
## 1  35 41  0.46052632
x=gesui.rf2$importance
x
##               MeanDecreaseGini
## slope                17.630621
## uedokaburi           21.741710
## masuhonsuu            6.025033
## long                 21.856484
## kubun                 2.095913
## did                   2.514011
## kouhou                1.421411
## ekijyouka             3.103169
## kyouyounensuu        18.940177
## kei                   6.265901

http://sfchaos.hatenablog.com/entry/20150628/p1 https://tjo.hatenablog.com/entry/2013/09/02/190449

出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。下の例では”0”(緊急度3以下)と478個予測したうち、実際に”0”だったものが450個、“1”だったものが28個と読み取れます。

重要度の高い順番に並び替え

rank <- data.frame(x)  # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank)  # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),]  # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank)  # ランキングを行名にする
rank
##    MeanDecreaseGini        factor
## 1         21.856484          long
## 2         21.741710    uedokaburi
## 3         18.940177 kyouyounensuu
## 4         17.630621         slope
## 5          6.265901           kei
## 6          6.025033    masuhonsuu
## 7          3.103169     ekijyouka
## 8          2.514011           did
## 9          2.095913         kubun
## 10         1.421411        kouhou
plot(gesui.rf2)

varImpPlot(gesui.rf2)

参考 https://yolo-kiyoshi.com/2019/09/16/post-1226/ https://aotamasaki.hatenablog.com/entry/bias_in_feature_importances

# 別のサイトでのランダムフォレストによるEDAをRで実践 https://navaclass.com/random-forest-eda/

#set.seed(111)
#ランダムフォレストモデルの学習
#boston.rf <- randomForest(kinkyuudo ~ .,
#boston.rf <- randomForest(taisyo ~ .,                          
#                          data = train,
#                          importance = TRUE)

#テストデータに対する予測
#pred <- predict(boston.rf, newdata = test)

#観測値と予測値をプロット
#plot(test$taisyo, pred, main = boston.rf$call)
#curve(identity, add = TRUE)

pred = predict(gesui.rf2, test)
plot(test$taisyo, pred, main = gesui.rf2$call)
curve(identity, add = TRUE)

#予測誤差(RMSE:二乗平均平方根誤差)
#予測誤差の推定のため目的変数をニューリックに変換する。


rms <- function(act, pred) {
  sqrt(mean((act - pred) ^ 2))
}
cat(" RMSE =", rms(test$taisyo, pred))
## Warning in Ops.factor(act, pred): '-' not meaningful for factors
##  RMSE = NA
#線形回帰モデルの予測誤差と比較
cat(" RMSE = ",
    rms(test$taisyo,
        predict(lm(taisyo ~ ., data = train), newdata = test)))
## Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
## response will be ignored
## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
## Warning in Ops.factor(act, pred): '-' not meaningful for factors
##  RMSE =  NA

https://funatsu-lab.github.io/open-course-ware/machine-learning/random-forest/

#特徴量重要度の出力  type = 1
boston.imp <-
  sort(gesui.rf2$importance, decreasing = TRUE)
barplot(boston.imp, names.arg = rownames(boston.imp))

SVMによる予測

ビジネスに活かすデータマイニング(尾崎豊) http://yut.hatenablog.com/entry/20120827/1346024147

stargazer(as.data.frame(gesui),type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
slope 254 3.337 2.024 0.000 1.900 4.173 9.900
uedokaburi 254 4.206 2.546 1.009 2.476 5.397 13.385
masuhonsuu 254 1.232 1.748 0 0 2 11
long 254 31.387 15.583 0.970 20.660 41.043 96.820
kouhou 254 0.331 0.471 0 0 1 1
kyouyounensuu 254 27.354 5.069 10 25 27 40
kei 254 391.929 163.533 200 250 600 900
library(e1071)

d.svm<-svm(taisyo ~ ., data = train)
print(d.svm)
## 
## Call:
## svm(formula = taisyo ~ ., data = train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  140
predsvm<-predict(d.svm,newdata=test)
summary(predsvm)
##  0  1 
## 20  8

予測結果と実測値の対比

kekka<-table(predsvm,test$taisyo)
kekka
##        
## predsvm  0  1
##       0 14  6
##       1  2  6

ニューラルネットワーク法による推定

library( nnet )

予測式

nn<-nnet(taisyo ~., data=train,size = 2, rang = .1, decay = 5e-4, maxit = 200 )
## # weights:  25
## initial  value 166.866381 
## iter  10 value 139.788985
## iter  20 value 122.976109
## iter  30 value 121.705225
## iter  40 value 121.700819
## iter  50 value 121.693387
## iter  60 value 121.687739
## iter  70 value 121.597941
## iter  80 value 120.776591
## iter  90 value 110.107610
## iter 100 value 108.928885
## iter 110 value 106.747153
## iter 120 value 106.273506
## iter 130 value 106.162321
## iter 140 value 105.967551
## iter 150 value 105.795665
## iter 160 value 105.726490
## iter 170 value 105.666118
## iter 180 value 105.579543
## iter 190 value 105.552047
## iter 200 value 105.546545
## final  value 105.546545 
## stopped after 200 iterations
nn
## a 10-2-1 network with 25 weights
## inputs: slope uedokaburi masuhonsuu long kubun2 did1 kouhou ekijyouka1 kyouyounensuu kei 
## output(s): taisyo 
## options were - entropy fitting  decay=5e-04
nn_predict<-predict(nn,test,type="class")
table(nn_predict, test$taisyo)
##           
## nn_predict  0  1
##          0 14  9
##          1  2  3
cat(test$taisyo, file = "testtaisyo2.txt", append =FALSE)
cat(nn_predict, file = "nnresult2.txt", append =FALSE)

nn_predict<-predict(nn,test,type=“raw”) nn_predict#推定値の生データ出力:https://mjin.doshisha.ac.jp/R/Chap_23/23.html nn_predict<-predict(nn,test,type=“class”)#推定値のグループ出力 #推定値グループのファイルテキスト出力http://takenaka-akio.org/doc/r_auto/chapter_03.html nn_predict cat(test\(taisyo, file = "testtaisyo.txt", append =FALSE) cat(predrandam, file = "lfresult.txt", append =FALSE) cat(predsvm, file = "svmresult.txt", append =FALSE) cat(nn_predict, file = "nnresult.txt", append =FALSE) kekka<-table(nn_predict, test\)taisyo) kekka