#3/26 学習データ数を254、テストデータ数を28に変更 参考資料
https://qiita.com/nkjm/items/e751e49c7d2c619cbeab
https://momonoki2017.blogspot.com/2018/04/r007-riris.html
http://d-m-l.jp/Rbiz/task_rf.html
https://funatsu-lab.github.io/open-course-ware/machine-learning/random-forest/
http://takenaka-akio.org/doc/r_auto/chapter_03.html
http://yut.hatenablog.com/entry/20120827/1346024147
https://mjin.doshisha.ac.jp/R/Chap_23/23.html
https://qiita.com/TsutomuNakamura/items/a1a6a02cb9bb0dcbb37f 混同行列(Confusion Matrix) とは
http://d-m-l.jp/Rbiz/task_rf.html ランダムフォレストとは機械学習のアルゴリズムの1つで、学習用のデータをランダムにサンプリングして多数の決定木を作成し、作成した決定木をもとに多数決で結果を決める方法です。精度、汎用性が高く扱いやすい分析手法です。
ランダムフォレストの特徴
#ランダムフォレストで使用するデータ - Titanics.rpart - Titanic - Titanichはtraingが統計処理されたデータでありこの演習には不向き - cordataは、グラフィック用に処理されたデータでありtrainのPclasswを3区分したり、sexを2区分するなど一部質的化したが、Fareh・年齢は量的データのままであり、氏名はそのままであり、欠落のあるデータは補完してある。 - ダミー変数ummy_varn等はカテゴリーデータをintegerデータに置き換えたものであり以下の論点に合わないらしいので使わない - lldataを使っても良いが、(makedummies()を使用してダミー変数)を実施する前のdumとnot_dum結合した、 - train2を使用する
#randomForestではCharacterは使わないようにしよう http://ushi-goroshi.hatenablog.com/entry/2019/01/30/171259
library(car)
## Loading required package: carData
library(caret)
## Warning: package 'caret' was built under R version 3.6.2
## Loading required package: lattice
library(cluster)
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(e1071)
## Warning: package 'e1071' was built under R version 3.6.2
library(epitools)
library(effects)
## Warning: package 'effects' was built under R version 3.6.3
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## Use the command
## lattice::trellis.par.set(effectsTheme())
## to customize lattice options for effects plots.
## See ?effectsTheme for details.
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.6.2
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.6.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(ranger)
## Warning: package 'ranger' was built under R version 3.6.2
##
## Attaching package: 'ranger'
## The following object is masked from 'package:randomForest':
##
## importance
library(rgl)
library(rattle)
## Warning: package 'rattle' was built under R version 3.6.2
## Rattle: A free graphical interface for data science with R.
## バージョン 5.3.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## 'rattle()' と入力して、データを多角的に分析します。
##
## Attaching package: 'rattle'
## The following object is masked from 'package:ranger':
##
## importance
## The following object is masked from 'package:randomForest':
##
## importance
library(readr)
## Warning: package 'readr' was built under R version 3.6.2
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.2
## Loading required package: rpart
library(rpart)
library(readr)
library(reshape)
## Warning: package 'reshape' was built under R version 3.6.2
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following object is masked from 'package:data.table':
##
## melt
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 3.6.2
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
##
## colsplit, melt, recast
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following objects are masked from 'package:reshape':
##
## expand, smiths
library(xtable)
library(nnet)
## Warning: package 'nnet' was built under R version 3.6.2
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(randomForest)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## -- Attaching packages ------------------------------------------------------------------------------ tidyverse 1.3.0 --
## √ tibble 2.1.3 √ stringr 1.4.0
## √ purrr 0.3.3 √ forcats 0.4.0
## Warning: package 'stringr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## -- Conflicts --------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x randomForest::combine() masks dplyr::combine()
## x tidyr::expand() masks reshape::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::lift() masks caret::lift()
## x randomForest::margin() masks ggplot2::margin()
## x dplyr::recode() masks car::recode()
## x reshape::rename() masks dplyr::rename()
## x purrr::some() masks car::some()
## x purrr::transpose() masks data.table::transpose()
gesui = read_csv("osui.csv")
## Parsed with column specification:
## cols(
## OBJECTID = col_double(),
## sys_name = col_double(),
## slope = col_double(),
## uedokaburi = col_double(),
## masuhonsuu = col_double(),
## long = col_double(),
## kubun = col_double(),
## did = col_double(),
## kouhou = col_double(),
## nendo = col_double(),
## ekijyouka = col_double(),
## kyouyounensuu = col_double(),
## kansyu = col_double(),
## kei = col_double(),
## kinkyuudo = col_double(),
## taisyo = col_double()
## )
gesui<- data.frame(gesui) # 教科書ではlogit
#OBJECTID列をデータから削除
gesui <- gesui[-1:-2]
stargazer(as.data.frame(gesui),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 1,423 | 3.437 | 2.323 | -6 | 1.8 | 4.5 | 10 |
uedokaburi | 1,423 | 4.371 | 2.475 | 0.360 | 2.727 | 4.949 | 13.863 |
masuhonsuu | 1,423 | 1.338 | 1.808 | 0 | 0 | 2 | 13 |
long | 1,423 | 35.129 | 18.972 | 0.970 | 21.445 | 46.510 | 196.280 |
kubun | 1,423 | 1.204 | 0.403 | 1 | 1 | 1 | 2 |
did | 1,423 | 0.696 | 0.460 | 0 | 0 | 1 | 1 |
kouhou | 1,423 | 0.415 | 0.493 | 0 | 0 | 1 | 1 |
nendo | 1,423 | 1,982.967 | 5.973 | 1,974 | 1,978 | 1,990 | 2,006 |
ekijyouka | 1,423 | 0.396 | 0.611 | 0 | 0 | 1 | 4 |
kyouyounensuu | 1,423 | 33.033 | 5.973 | 10 | 26 | 38 | 42 |
kansyu | 1,423 | 1.198 | 0.399 | 1 | 1 | 1 | 2 |
kei | 1,423 | 517.182 | 308.765 | 200 | 250 | 800 | 1,650 |
kinkyuudo | 1,423 | 1.297 | 1.457 | 0 | 0 | 3 | 3 |
taisyo | 1,423 | 0.448 | 0.497 | 0 | 0 | 1 | 1 |
gesui$kansyu <- as.factor(gesui$kansyu)
gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
stargazer(as.data.frame(gesui),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 1,423 | 3.437 | 2.323 | -6 | 1.8 | 4.5 | 10 |
uedokaburi | 1,423 | 4.371 | 2.475 | 0.360 | 2.727 | 4.949 | 13.863 |
masuhonsuu | 1,423 | 1.338 | 1.808 | 0 | 0 | 2 | 13 |
long | 1,423 | 35.129 | 18.972 | 0.970 | 21.445 | 46.510 | 196.280 |
kouhou | 1,423 | 0.415 | 0.493 | 0 | 0 | 1 | 1 |
nendo | 1,423 | 1,982.967 | 5.973 | 1,974 | 1,978 | 1,990 | 2,006 |
kyouyounensuu | 1,423 | 33.033 | 5.973 | 10 | 26 | 38 | 42 |
kei | 1,423 | 517.182 | 308.765 | 200 | 250 | 800 | 1,650 |
kinkyuudo | 1,423 | 1.297 | 1.457 | 0 | 0 | 3 | 3 |
exclude_cols = c("OBJECTID","kinkyuudo")
gesui = gesui[ !names(gesui) %in% exclude_cols ]
randomgesui<-sample(1281,142)
test <- gesui[randomgesui,]
train <-gesui[-randomgesui,]
gesui <- train
model = randomForest(taisyo ~ ., data = gesui)
model
##
## Call:
## randomForest(formula = taisyo ~ ., data = gesui)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 25.84%
## Confusion matrix:
## 0 1 class.error
## 0 560 147 0.2079208
## 1 184 390 0.3205575
predition = predict(model, gesui)
predition
## 2 3 4 6 7 8 9 10 12 13 14 15 16 17 18 19
## 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 1
## 21 22 23 24 25 26 27 28 29 30 32 33 34 35 36 37
## 0 1 1 1 1 0 0 1 1 0 1 1 1 0 0 1
## 38 39 40 41 42 43 45 46 47 48 49 51 52 53 54 55
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## 0 0 1 0 1 1 1 0 0 0 1 1 1 1 0 0
## 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
## 1 0 0 1 1 0 0 1 0 1 1 1 1 1 1 0
## 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
## 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104 105 106 107 108 109 110 111 112 113 115 116 117 118 119 120
## 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 135 136 137
## 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1
## 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 154
## 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0
## 155 156 157 160 162 163 164 165 166 167 168 169 170 172 173 174
## 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 176 177 178 179 180 181 182 183 184 185 186 187 189 190 191 192
## 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0
## 193 194 195 196 198 199 200 201 202 203 204 205 206 207 209 210
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 211 212 213 214 215 216 219 220 222 223 224 225 226 227 228 229
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 231 232 233 234 235 236 237 239 240 242 243 244 245 246 247 249
## 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1
## 250 251 252 253 254 255 256 258 259 260 261 262 263 265 266 267
## 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0
## 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
## 1 1 1 1 1 0 0 0 1 1 1 1 0 1 1 1
## 284 285 286 287 288 289 290 291 292 293 295 297 298 299 300 301
## 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0
## 302 304 305 306 307 308 310 311 312 313 314 315 316 317 318 319
## 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 1
## 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
## 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1
## 336 338 339 340 341 342 345 346 347 348 349 350 351 352 353 354
## 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1
## 355 356 359 360 361 362 363 365 366 367 368 369 370 371 372 373
## 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
## 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1
## 390 392 393 394 395 396 398 399 400 401 402 403 404 405 406 407
## 1 1 0 1 1 0 1 1 1 1 0 1 1 1 0 1
## 408 410 411 412 414 415 416 417 418 420 421 422 423 424 425 426
## 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0
## 429 432 433 434 435 436 437 438 440 441 442 444 445 446 447 448
## 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1
## 449 450 452 453 454 455 456 457 458 459 460 462 463 464 465 466
## 0 1 0 1 1 1 1 1 1 1 1 0 0 0 0 1
## 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
## 1 0 0 0 1 0 1 1 0 0 0 0 0 1 1 1
## 483 484 485 486 487 488 489 490 492 493 494 495 496 497 498 499
## 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1
## 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
## 0 1 0 1 1 1 0 1 0 1 0 1 1 1 1 0
## 516 517 518 520 521 522 523 524 525 526 527 528 529 530 531 532
## 1 1 0 0 0 1 1 1 1 1 1 1 1 0 0 1
## 533 535 536 537 539 540 541 542 543 544 546 547 548 549 550 551
## 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 0
## 553 554 555 556 557 558 560 561 562 563 564 565 567 568 569 570
## 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0 0
## 571 572 573 574 575 577 578 579 580 581 582 583 584 585 586 587
## 1 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0
## 588 590 591 592 594 595 596 597 598 599 600 601 602 603 604 605
## 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1
## 606 607 608 609 610 611 612 613 614 618 619 621 622 623 624 625
## 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1
## 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
## 1 1 0 1 0 0 1 0 1 0 0 0 0 1 1 0
## 643 644 645 646 647 649 650 651 652 653 654 655 656 657 658 660
## 1 1 0 0 1 0 0 0 1 0 1 1 1 1 1 1
## 661 662 663 664 665 666 667 668 669 671 672 673 674 675 676 677
## 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 678 679 680 681 682 683 684 685 686 687 689 690 691 692 693 694
## 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 695 696 697 698 699 700 701 704 706 707 708 709 711 712 713 715
## 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 716 717 719 720 721 722 723 724 725 726 727 728 729 730 731 732
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 733 734 735 736 737 738 739 740 741 743 744 745 746 747 748 749
## 0 1 0 0 0 0 0 1 1 1 0 0 0 1 0 0
## 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
## 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0
## 766 768 769 770 771 772 773 776 777 778 780 781 782 783 784 785
## 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0
## 786 787 788 790 793 794 795 796 797 798 799 800 801 802 803 804
## 1 0 0 0 0 0 0 1 1 1 0 1 1 1 1 1
## 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
## 821 822 823 824 825 826 828 829 831 832 833 834 835 836 837 838
## 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0
## 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
## 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1
## 855 856 857 858 859 860 861 862 863 865 866 867 868 869 870 871
## 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0
## 873 874 876 877 878 879 881 882 883 884 885 886 887 888 889 890
## 0 1 1 1 0 0 0 0 1 0 1 1 0 1 1 1
## 891 892 893 894 895 896 897 898 901 902 903 904 905 906 907 908
## 1 1 1 1 0 0 0 1 1 1 1 1 0 0 1 1
## 909 910 911 912 913 914 915 916 917 918 919 920 921 923 924 925
## 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1
## 926 927 930 931 932 933 934 935 937 938 939 940 941 942 943 944
## 1 0 1 1 0 1 1 1 0 0 1 1 1 0 1 0
## 945 946 947 949 950 951 952 953 955 956 957 959 960 961 962 964
## 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1
## 966 967 968 969 970 971 972 973 974 975 976 977 978 980 981 983
## 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0
## 984 985 987 988 989 990 992 993 994 995 996 997 998 999 1000 1001
## 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1016 1017 1018
## 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 0
## 1019 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
## 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0
## 1036 1038 1040 1041 1042 1043 1044 1046 1047 1048 1049 1050 1052 1053 1054 1055
## 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1069 1070 1071 1072 1073
## 0 0 0 0 1 0 0 1 1 0 1 0 1 1 1 1
## 1074 1075 1076 1078 1079 1082 1083 1085 1086 1087 1088 1089 1090 1091 1092 1093
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
## 1094 1096 1098 1099 1100 1101 1102 1103 1104 1105 1106 1108 1109 1110 1112 1113
## 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0
## 1114 1115 1116 1117 1118 1119 1120 1122 1123 1124 1125 1126 1127 1128 1129 1130
## 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1
## 1131 1132 1133 1135 1136 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
## 1 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1
## 1149 1151 1152 1154 1155 1156 1157 1158 1159 1160 1162 1163 1164 1165 1167 1168
## 1 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0
## 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1181 1183 1184 1186 1187
## 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
## 1188 1189 1190 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
## 1 1 1 0 0 1 0 1 0 0 0 1 0 1 1 1
## 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1216 1217 1218 1219 1221 1222
## 1 1 1 0 0 0 1 1 1 1 0 0 0 1 0 0
## 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1236 1237 1238 1239
## 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0
## 1240 1241 1243 1244 1245 1246 1247 1248 1250 1251 1252 1253 1254 1255 1256 1257
## 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
## 1258 1259 1260 1263 1265 1266 1267 1268 1270 1271 1272 1273 1274 1275 1276 1277
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1278 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294
## 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0
## 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
## 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0
## 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
## 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
## 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
## 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358
## 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
## 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0
## 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406
## 0 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0
## 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
## 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 1423
## 0
## Levels: 0 1
names(gesui)
## [1] "slope" "uedokaburi" "masuhonsuu" "long"
## [5] "kubun" "did" "kouhou" "nendo"
## [9] "ekijyouka" "kyouyounensuu" "kansyu" "kei"
## [13] "taisyo"
#参考資料ではimportance(model1)で変数の重みが算出される事になっているが、実際にはmodel1$importancedでないと 算出できない。
model$importance
## MeanDecreaseGini
## slope 96.03669
## uedokaburi 110.54773
## masuhonsuu 39.34200
## long 104.98965
## kubun 10.28200
## did 15.27289
## kouhou 12.58609
## nendo 52.80419
## ekijyouka 23.82998
## kyouyounensuu 51.39913
## kansyu 19.07822
## kei 51.43232
varImpPlot(model)
ランダムフォレストチューニング(データ検証) http://d-m-l.jp/Rbiz/task_rf.html
http://sfchaos.hatenablog.com/entry/20150628/p1
#注1:set.seed(123)乱数発生 ttps://qiita.com/aich_08_/items/6d885c91c9d461514018
まずは単純にtuneRF関数を実行してみる まずは特別な設定を行わずにtuneRF関数を実行してみよう.tuneRF関数の第1引数には説明変数,第2引数には目的変数を指定する.また,doBest引数をTRUEに指定すると,評価が最も良いモデルを返すようになる.
dim(gesui)
## [1] 1281 13
sapply(gesui, class)
## slope uedokaburi masuhonsuu long kubun
## "numeric" "numeric" "numeric" "numeric" "factor"
## did kouhou nendo ekijyouka kyouyounensuu
## "factor" "numeric" "numeric" "factor" "numeric"
## kansyu kei taisyo
## "factor" "numeric" "factor"
head(gesui)
## slope uedokaburi masuhonsuu long kubun did kouhou nendo ekijyouka
## 2 3.90 3.763000 0 15.40 2 1 0 1976 1
## 3 1.32 3.538794 1 14.85 2 1 0 1976 1
## 4 1.22 1.054575 1 3.39 2 1 0 2004 1
## 6 3.50 4.122386 0 9.75 1 1 1 1976 1
## 7 6.80 3.990999 0 2.68 1 1 1 1976 1
## 8 5.00 2.331392 0 12.32 1 1 0 1982 1
## kyouyounensuu kansyu kei taisyo
## 2 40 1 800 1
## 3 40 1 250 1
## 4 12 2 200 0
## 6 40 1 1100 0
## 7 40 1 1100 1
## 8 34 1 1100 1
set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
gesui$taisyo, # 目的変数
doBest = T) #分岐に使う変数の数(mtry)を求めるフラグ
## mtry = 3 OOB error = 27.79%
## Searching left ...
## mtry = 2 OOB error = 28.65%
## -0.03089888 0.05
## Searching right ...
## mtry = 6 OOB error = 27.17%
## 0.02247191 0.05
この結果,特徴量の個数が
3個のときに,Out-of-Bag誤差(OOB error)は7.11% 6個のときに,Out-of-Bag誤差は6.698%、 2個のときに,Out-of-Bag誤差は6.28%、 1個のときに,Out-of-Bag誤差は5.868%、
となり,特徴量の個数が3個のときにOut-of-Bag誤差が最少となり, この個数に設定するのが良さそうであることがわかる*1
構築する決定木の個数を増やしてみる ntreeTry引数はデフォルトでは50となっており,50個の決定木を構築することがわかる.500個の決定木を構築するように指定してみよう.
set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
gesui$taisyo, # 目的変数
ntreeTry=500, #決定木数
trace = TRUE,
doBest = T)
## mtry = 3 OOB error = 26.39%
## Searching left ...
## mtry = 2 OOB error = 25.6%
## 0.0295858 0.05
## Searching right ...
## mtry = 6 OOB error = 26.15%
## 0.00887574 0.05
3個のときに,Out-of-Bag誤差(OOB error)が最大であることは変わらない
チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。
gesui.rf <- randomForest( # 予測、分類器の構築
taisyo ~ ., # モデル式
data = gesui, # データ
mtry = gesui.tune$mtry) # 分岐に使う変数の数
gesui.rf
##
## Call:
## randomForest(formula = taisyo ~ ., data = gesui, mtry = gesui.tune$mtry)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 25.92%
## Confusion matrix:
## 0 1 class.error
## 0 559 148 0.2093352
## 1 184 390 0.3205575
x=gesui.rf$importance
出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。下の例では”0”(緊急度3以下)と478個予測したうち、実際に”0”だったものが450個、“1”だったものが28個と読み取れます。
#重要度順のグラフを出力
rank <- data.frame(x) # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank) # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),] # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank) # ランキングを行名にする
rank
## MeanDecreaseGini factor
## 1 83.128375 uedokaburi
## 2 80.471561 long
## 3 75.992068 slope
## 4 49.962255 nendo
## 5 48.638487 kyouyounensuu
## 6 45.147943 kei
## 7 32.159994 masuhonsuu
## 8 22.306871 ekijyouka
## 9 16.112339 kansyu
## 10 14.786870 did
## 11 11.065621 kouhou
## 12 8.724148 kubun
varImpPlot(gesui.rf)
plot(gesui, col=c(2, 3)[gesui$taisyo])
predition = predict(gesui.rf, test)
table(predition,test$taisyo)
##
## predition 0 1
## 0 64 16
## 1 15 47
-下水道データ読み込み# 基本統計量表示 gesui # 教科書ではlogit
gesui = read_csv("gesuidou.csv")
## Parsed with column specification:
## cols(
## OBJECTID = col_double(),
## slope = col_double(),
## long = col_double(),
## uedokaburi = col_double(),
## sitadokaburi = col_double(),
## masuhonsuu = col_double(),
## nendo = col_double(),
## kei = col_double(),
## kubun = col_double(),
## did = col_double(),
## kouhou = col_double(),
## ekijyouka = col_double(),
## kansyu = col_double(),
## kinkyuudo = col_double(),
## taisyo = col_double()
## )
gesui<- data.frame(gesui) # 教科書ではlogit
gesui$kansyu <- as.factor(gesui$kansyu)
gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
#gesui <- gesui[-1] #OBJECTID列をデータから削除
exclude_cols = c("OBJECTID","sys_name")
gesui = gesui[ !names(gesui) %in% exclude_cols ]
set.seed(123)#注1
gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
gesui$kinkyuudo, # 目的変数
ntreeTry=500, #決定木数
trace = TRUE,
doBest = T)
## Warning in randomForest.default(x, y, mtry = mtryStart, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 4 OOB error = 1.480305
## Searching left ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 2 OOB error = 1.486863
## -0.004430167 0.05
## Searching right ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
## mtry = 8 OOB error = 1.519686
## -0.02660376 0.05
## Warning in randomForest.default(x, y, mtry = res[which.min(res[, 2]), 1], :
## The response has five or fewer unique values. Are you sure you want to do
## regression?
3個のときに,Out-of-Bag誤差(OOB error)が最大であることは変わらない
チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。
gesui.rf <- randomForest( # 予測、分類器の構築
kinkyuudo ~ ., # モデル式
data = gesui, # データ
mtry = gesui.tune$mtry) # 分岐に使う変数の数
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
gesui.rf
##
## Call:
## randomForest(formula = kinkyuudo ~ ., data = gesui, mtry = gesui.tune$mtry)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 4
##
## Mean of squared residuals: 1.476213
## % Var explained: 30.78
x=gesui.rf$importance
出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。 上の例では正解率69.04% ”0”(緊急度3以下)と218個予測したうち、実際に”0”だったものが162個、“2”だったものが2個、“3”だったものが54と読み取れます。
#重要度順のグラフを出力
rank <- data.frame(x) # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank) # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),] # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank) # ランキングを行名にする
rank
## IncNodePurity factor
## 1 137.118695 long
## 2 132.486026 sitadokaburi
## 3 130.789376 slope
## 4 124.661796 uedokaburi
## 5 116.354171 kansyu
## 6 69.429390 nendo
## 7 62.659995 kei
## 8 47.334756 masuhonsuu
## 9 38.187225 ekijyouka
## 10 19.239761 did
## 11 14.344117 kouhou
## 12 9.550413 taisyo
## 13 5.626238 kubun
重要度順のグラフを出力
varImpPlot(gesui.rf)
plot(gesui, col=c(2, 3, 4)[gesui$kionkyudo])
-下水道データ読み込み# 基本統計量表示 gesui # 教科書ではlogit
#gesui = read_csv("osui2.csv")
gesui = read_csv("enbi.csv")
## Parsed with column specification:
## cols(
## OBJECTID = col_double(),
## sys_name = col_double(),
## slope = col_double(),
## uedokaburi = col_double(),
## masuhonsuu = col_double(),
## long = col_double(),
## kubun = col_double(),
## did = col_double(),
## kouhou = col_double(),
## nendo = col_double(),
## ekijyouka = col_double(),
## kyouyounensuu = col_double(),
## kansyu = col_double(),
## kei = col_double(),
## kinkyuudo = col_double(),
## taisyo = col_double()
## )
gesui <- data.frame(gesui) # 教科書ではlogit
#testデータの行番号取得
#randomgesui<-sample(282,200)
#train <- gesui[randomgesui,]
#test <-gesui[-randomgesui,]
#cat(test$sys_name, file = "testrow.txt",append=FALSE)
#write.table(test,"testoutput.txt", quote=F,
# col.names=T, append=T)
gesui <- gesui[-1:-2] #OBJECTID,sys_name列をデータから削除
gesui <- gesui[-13]
gesui <- gesui[-8]
gesui <- gesui[-10]
stargazer(as.data.frame(gesui),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 282 | 3.309 | 2.017 | 0.000 | 1.900 | 4.100 | 9.900 |
uedokaburi | 282 | 4.218 | 2.570 | 1.009 | 2.462 | 5.397 | 13.385 |
masuhonsuu | 282 | 1.284 | 1.765 | 0 | 0 | 2 | 11 |
long | 282 | 31.300 | 15.309 | 0.970 | 21.325 | 40.492 | 96.820 |
kubun | 282 | 1.209 | 0.407 | 1 | 1 | 1 | 2 |
did | 282 | 0.766 | 0.424 | 0 | 1 | 1 | 1 |
kouhou | 282 | 0.337 | 0.473 | 0 | 0 | 1 | 1 |
ekijyouka | 282 | 0.202 | 0.402 | 0 | 0 | 0 | 1 |
kyouyounensuu | 282 | 27.514 | 5.204 | 10 | 25 | 27 | 40 |
kei | 282 | 390.248 | 162.287 | 200 | 250 | 600 | 900 |
taisyo | 282 | 0.312 | 0.464 | 0 | 0 | 1 | 1 |
gesui$taisyo <- as.factor(gesui$taisyo)
#gesui$kansyu <- as.factor(gesui$kansyu)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
#gesui$kinkyuudo <- as.factor(gesui$kinkyuudo)
sapply(gesui, class)
## slope uedokaburi masuhonsuu long kubun
## "numeric" "numeric" "numeric" "numeric" "factor"
## did kouhou ekijyouka kyouyounensuu kei
## "factor" "numeric" "factor" "numeric" "numeric"
## taisyo
## "factor"
summary(gesui)
## slope uedokaburi masuhonsuu long kubun
## Min. :0.000 Min. : 1.009 Min. : 0.000 Min. : 0.97 1:223
## 1st Qu.:1.900 1st Qu.: 2.462 1st Qu.: 0.000 1st Qu.:21.32 2: 59
## Median :2.685 Median : 3.402 Median : 1.000 Median :30.06
## Mean :3.309 Mean : 4.218 Mean : 1.284 Mean :31.30
## 3rd Qu.:4.100 3rd Qu.: 5.397 3rd Qu.: 2.000 3rd Qu.:40.49
## Max. :9.900 Max. :13.385 Max. :11.000 Max. :96.82
## did kouhou ekijyouka kyouyounensuu kei taisyo
## 0: 66 Min. :0.0000 0:225 Min. :10.00 Min. :200.0 0:194
## 1:216 1st Qu.:0.0000 1: 57 1st Qu.:25.00 1st Qu.:250.0 1: 88
## Median :0.0000 Median :25.00 Median :250.0
## Mean :0.3369 Mean :27.51 Mean :390.2
## 3rd Qu.:1.0000 3rd Qu.:27.00 3rd Qu.:600.0
## Max. :1.0000 Max. :40.00 Max. :900.0
randomgesui<-sample(282,200)
train <- gesui[randomgesui,]
test <-gesui[-randomgesui,]
gesui <- train
#3/26 学習データ数を254、テストデータ数を28に変更
data <- data.frame(gesui) # 教科書ではlogit
data = read_csv("enbi.csv")
## Parsed with column specification:
## cols(
## OBJECTID = col_double(),
## sys_name = col_double(),
## slope = col_double(),
## uedokaburi = col_double(),
## masuhonsuu = col_double(),
## long = col_double(),
## kubun = col_double(),
## did = col_double(),
## kouhou = col_double(),
## nendo = col_double(),
## ekijyouka = col_double(),
## kyouyounensuu = col_double(),
## kansyu = col_double(),
## kei = col_double(),
## kinkyuudo = col_double(),
## taisyo = col_double()
## )
data <- data.frame(data)
set.seed(20180924)
sub <- sample(1:282, 254)
gesui <- data[sub,]
gesui
## OBJECTID sys_name slope uedokaburi masuhonsuu long kubun did kouhou nendo
## 223 1363 223 1.340 3.484858 4 20.20 1 1 0 1991
## 15 151 15 1.740 1.436539 3 9.25 2 1 0 1977
## 180 1237 180 2.130 2.660371 2 29.31 1 1 0 1976
## 214 1354 214 2.320 4.574894 1 30.18 2 1 0 1976
## 57 338 57 3.900 2.796943 2 30.01 2 1 0 1979
## 192 1256 192 2.300 2.346045 2 29.68 1 1 0 1976
## 63 344 63 4.100 1.694001 1 31.01 2 1 0 1979
## 132 979 132 1.100 10.907328 0 22.06 1 1 1 1991
## 149 1087 149 3.200 3.916999 0 40.78 1 0 1 1991
## 215 1355 215 5.600 2.498483 5 26.90 1 1 0 1991
## 267 1578 267 2.450 2.518753 2 26.93 1 1 0 1991
## 245 1458 245 1.010 3.850776 1 24.86 1 1 0 1992
## 168 1212 168 3.300 10.594034 0 30.59 1 1 1 1991
## 30 230 30 5.000 6.649337 0 14.13 1 1 1 1990
## 47 284 47 1.610 9.463612 1 15.54 1 1 0 1991
## 32 232 32 1.580 4.386176 0 5.96 1 1 0 1999
## 97 777 97 3.600 5.855999 1 55.42 1 0 0 1989
## 225 1365 225 1.990 2.754591 3 34.95 2 1 0 1991
## 130 977 130 1.000 6.382495 0 31.11 1 1 1 1990
## 178 1222 178 2.100 5.403999 0 54.79 1 0 1 1991
## 22 193 22 1.500 3.253370 0 13.00 1 0 0 1991
## 92 770 92 1.400 6.330505 0 19.13 1 1 1 1989
## 176 1220 176 3.000 6.259277 0 58.39 1 0 1 1991
## 107 891 107 3.300 4.910003 2 65.07 1 1 1 1989
## 79 447 79 1.820 1.366309 0 23.59 2 1 0 1992
## 139 1077 139 2.100 4.224999 0 50.09 1 0 1 1991
## 212 1352 212 2.120 2.557118 5 32.00 1 1 0 1976
## 18 189 18 2.200 6.027199 0 10.12 1 0 1 1992
## 241 1441 241 3.300 2.621705 3 33.74 1 1 0 1991
## 200 1328 200 3.400 5.916304 0 63.02 1 1 1 1990
## 64 345 64 2.500 1.617001 3 24.86 2 1 0 1979
## 20 191 20 5.000 3.847243 1 15.15 1 0 0 1991
## 183 1240 183 2.040 2.161000 0 29.91 1 1 0 1976
## 16 152 16 4.270 1.472376 0 7.86 2 1 0 1977
## 99 779 99 2.100 3.525528 1 63.57 1 0 0 1989
## 34 243 34 6.500 4.590000 0 9.95 1 0 1 1982
## 156 1103 156 1.800 3.661790 2 28.05 1 0 1 1988
## 113 960 113 4.900 3.828565 11 51.83 2 1 0 1992
## 125 972 125 2.100 10.516000 0 42.60 1 1 1 1991
## 274 1590 274 2.670 2.685897 0 27.49 1 1 0 1991
## 169 1213 169 4.600 10.591164 1 26.39 1 0 1 1991
## 144 1082 144 4.100 3.810922 0 48.17 1 0 1 1991
## 106 874 106 1.900 3.748734 0 42.66 2 0 0 1989
## 211 1351 211 1.480 1.864272 0 20.00 1 1 0 1976
## 166 1207 166 6.900 2.981946 0 34.58 1 1 0 1990
## 161 1201 161 2.800 2.990898 0 54.94 1 0 0 2006
## 82 565 82 4.500 6.974002 0 96.82 1 1 1 1989
## 184 1241 184 2.030 2.351002 0 30.00 1 1 0 1990
## 242 1442 242 6.800 4.806221 0 29.86 1 1 0 1991
## 114 961 114 3.500 3.400039 10 52.24 2 1 0 1992
## 69 350 69 5.500 1.486400 3 26.99 2 1 0 1979
## 121 968 121 1.700 12.168823 0 30.58 1 1 1 1991
## 19 190 19 5.000 6.957029 1 15.65 1 0 1 1991
## 185 1242 185 1.730 1.838435 1 27.80 1 1 0 1990
## 278 1594 278 3.710 2.891690 3 27.74 1 1 0 1991
## 1 6 1 1.220 1.054575 1 3.39 2 1 0 2004
## 252 1550 252 3.190 9.491322 4 19.56 1 1 0 1992
## 170 1214 170 3.200 8.921724 1 52.98 1 1 1 1991
## 164 1204 164 4.100 3.150200 1 50.89 1 0 0 1990
## 126 973 126 2.500 9.882314 0 41.64 1 1 1 1991
## 240 1440 240 1.250 2.002232 6 26.20 1 1 0 1991
## 153 1091 153 2.100 4.563110 0 38.55 1 0 1 1991
## 205 1335 205 4.060 1.982026 4 25.49 1 1 0 1990
## 46 282 46 7.720 9.556511 5 11.01 1 1 0 1991
## 150 1088 150 2.700 3.925999 0 41.25 1 0 1 1991
## 210 1349 210 9.700 2.816253 3 31.90 1 1 0 1991
## 115 962 115 4.120 4.084854 7 51.90 2 1 1 1992
## 135 982 135 1.000 4.740499 0 24.86 1 1 1 1990
## 108 895 108 9.600 2.481001 3 39.08 2 1 0 1990
## 181 1238 181 2.300 1.660551 1 19.08 1 1 0 1976
## 254 1552 254 2.590 9.356372 0 19.97 1 0 0 1991
## 21 192 21 1.500 2.211840 0 14.47 1 0 0 1991
## 231 1421 231 5.400 10.806235 0 40.03 1 0 1 1982
## 155 1102 155 2.400 4.211381 4 59.97 1 0 1 1988
## 187 1244 187 1.880 2.855000 2 30.04 1 1 0 1990
## 91 769 91 1.000 6.574005 1 22.79 1 1 1 1989
## 197 1261 197 2.420 4.470321 1 29.94 2 1 0 1976
## 40 251 40 4.100 2.749357 0 10.02 1 1 0 1991
## 140 1078 140 1.900 4.243001 0 50.01 1 0 1 1991
## 162 1202 162 3.200 2.937999 0 36.44 1 0 0 1990
## 86 763 86 1.400 7.042785 1 40.03 1 1 1 1989
## 190 1253 190 9.200 2.236709 3 33.61 1 1 0 1990
## 206 1336 206 3.230 1.742547 3 30.95 1 1 0 1990
## 202 1332 202 4.020 2.240832 2 31.20 1 1 0 1990
## 237 1435 237 4.210 2.289566 1 23.22 2 1 0 1991
## 85 762 85 1.100 4.548029 0 42.85 1 0 1 1989
## 41 252 41 1.760 2.432278 0 7.87 1 1 0 1991
## 186 1243 186 1.820 2.143259 2 29.91 1 1 0 1990
## 148 1086 148 4.200 4.861001 0 40.29 1 0 1 1991
## 272 1588 272 1.960 2.694672 3 27.25 1 1 0 1991
## 71 429 71 1.060 2.725467 1 29.09 2 1 0 1990
## 110 915 110 3.500 3.403083 4 37.96 2 1 0 1995
## 281 1597 281 4.400 4.937003 0 39.80 1 1 1 1991
## 261 1572 261 3.210 3.068203 6 46.50 1 1 0 1991
## 61 342 61 2.400 1.956000 2 25.15 2 1 0 1979
## 276 1592 276 4.800 4.976580 0 55.17 1 1 1 1991
## 136 983 136 1.100 6.537330 0 47.24 1 1 1 1990
## 262 1573 262 4.300 6.827301 0 51.84 1 1 1 1991
## 188 1245 188 1.790 2.212618 2 30.12 1 1 0 1990
## 93 771 93 5.000 3.920159 1 32.19 1 1 0 1989
## 266 1577 266 2.280 2.663494 2 27.46 1 1 0 1991
## 165 1206 165 3.100 3.199907 0 44.39 1 0 0 1990
## 228 1394 228 2.590 2.475453 0 32.61 1 1 0 1976
## 56 335 56 5.700 3.024835 3 36.10 2 1 0 1976
## 233 1427 233 2.060 4.124568 0 77.03 1 1 1 1991
## 250 1546 250 1.710 9.652271 2 18.98 1 1 0 1991
## 255 1553 255 2.600 9.327517 1 28.02 1 0 0 1991
## 26 198 26 1.690 1.824348 0 10.00 1 1 0 1990
## 173 1217 173 3.000 5.177794 1 18.24 1 0 0 1991
## 277 1593 277 3.670 2.667007 0 27.45 1 1 0 1991
## 44 255 44 2.440 3.414251 0 14.07 2 1 0 1992
## 201 1331 201 3.900 2.064381 2 29.98 1 1 0 1990
## 247 1463 247 4.100 6.122001 0 46.81 1 1 1 1991
## 217 1357 217 8.000 2.589767 3 19.90 1 1 0 1991
## 249 1545 249 3.130 9.776862 2 19.94 1 1 0 1991
## 204 1334 204 4.070 1.891763 1 24.00 1 1 0 1990
## 172 1216 172 3.300 5.519844 1 21.85 1 0 1 1991
## 251 1547 251 1.630 9.552125 2 29.99 1 1 0 1991
## 29 225 29 1.895 3.646695 0 2.71 1 1 0 1989
## 14 146 14 1.300 6.858791 0 2.79 1 1 1 1991
## 159 1199 159 2.900 2.635552 1 48.85 1 0 0 1990
## 65 346 65 3.600 1.572562 1 24.52 2 1 0 1979
## 3 19 3 4.710 1.414000 0 5.02 2 1 0 1992
## 163 1203 163 3.800 2.868704 1 37.59 1 0 0 1990
## 53 293 53 3.580 2.603039 1 16.04 1 1 0 1991
## 112 942 112 2.500 3.922952 3 40.56 1 1 1 1984
## 67 348 67 4.300 1.739730 0 24.93 2 1 0 1979
## 49 289 49 2.730 2.860160 1 7.52 1 1 0 1991
## 220 1360 220 1.600 2.942733 3 31.80 1 1 0 1991
## 208 1344 208 4.010 1.798224 2 29.92 1 1 0 1990
## 128 975 128 3.300 7.612320 0 38.28 1 1 1 1990
## 9 131 9 1.300 1.819819 1 9.12 1 0 0 1989
## 157 1197 157 6.100 3.115186 0 43.01 1 0 0 1990
## 6 23 6 8.900 1.738222 0 15.72 2 1 0 1992
## 101 781 101 5.500 3.339510 0 36.44 1 0 0 1989
## 58 339 58 4.400 2.611828 3 26.75 2 1 0 1979
## 244 1457 244 1.030 5.408001 0 41.07 1 1 1 1992
## 138 1076 138 1.800 4.652454 0 50.20 1 0 1 1991
## 119 966 119 2.300 10.198399 3 45.18 1 1 1 1991
## 131 978 131 8.700 1.926552 4 31.86 2 1 0 1990
## 105 873 105 9.500 1.942961 0 39.40 1 1 0 1989
## 39 250 39 3.900 2.736001 1 12.99 1 1 0 1991
## 117 964 117 2.400 7.072001 2 51.09 1 1 1 1991
## 52 292 52 3.280 3.394147 1 15.78 1 1 0 1991
## 189 1252 189 2.630 1.843763 2 27.15 1 1 0 1976
## 265 1576 265 2.280 2.499810 2 27.46 1 1 0 1991
## 37 248 37 8.700 2.607048 1 8.70 1 1 0 1991
## 207 1337 207 3.830 1.909999 2 28.60 1 1 0 1989
## 68 349 68 4.900 1.586905 2 24.91 2 1 0 1979
## 263 1574 263 5.400 5.521916 0 54.91 1 1 1 1991
## 66 347 66 6.200 1.243001 4 46.92 2 1 0 1988
## 103 785 103 2.600 5.532493 0 31.99 1 0 1 1989
## 258 1560 258 2.960 3.700743 3 26.81 1 1 0 1991
## 23 194 23 1.400 3.677926 1 13.85 1 0 0 1991
## 84 761 84 1.200 4.675212 0 45.45 1 0 1 1989
## 142 1080 142 3.400 4.247999 0 52.10 1 0 1 1991
## 62 343 62 4.500 1.664466 1 26.14 2 1 0 1979
## 280 1596 280 3.710 1.889743 4 31.13 1 1 0 1991
## 43 254 43 3.380 3.460195 1 12.04 2 1 0 1991
## 243 1443 243 5.300 5.008060 0 21.81 1 1 0 1991
## 28 201 28 1.720 2.119883 0 17.00 1 1 0 1976
## 4 20 4 1.100 1.544714 3 13.17 2 1 0 1992
## 268 1579 268 2.900 7.568516 0 54.50 1 1 1 1991
## 54 294 54 1.630 2.476748 4 45.65 1 1 0 1991
## 88 766 88 3.000 6.631325 0 45.98 1 1 1 1989
## 174 1218 174 3.400 2.611999 2 25.26 1 0 0 1991
## 198 1262 198 2.370 3.777865 0 29.87 2 1 0 1976
## 279 1595 279 2.380 4.197623 5 40.25 1 1 0 1991
## 143 1081 143 2.300 4.438878 0 46.81 1 0 1 1991
## 175 1219 175 4.000 3.250242 0 17.93 1 0 0 1991
## 78 446 78 2.120 1.008538 2 30.99 2 1 0 1992
## 182 1239 182 2.140 1.856014 1 29.30 1 1 0 1976
## 74 432 74 4.600 2.853477 0 39.85 2 1 0 1990
## 118 965 118 7.000 7.090262 0 31.00 1 1 1 1991
## 83 679 83 3.000 5.669425 9 40.00 1 1 1 1983
## 222 1362 222 7.800 7.650076 1 21.90 1 1 0 1991
## 196 1260 196 2.940 2.054423 2 28.86 1 1 0 1976
## 218 1358 218 9.900 2.596578 4 32.30 1 1 0 1991
## 273 1589 273 4.700 6.140733 1 54.96 1 1 1 1991
## 264 1575 264 1.990 2.896992 2 27.42 1 1 0 1991
## 271 1582 271 3.350 2.644891 2 36.92 1 1 0 1991
## 100 780 100 4.700 3.494682 1 33.71 1 0 0 1989
## 160 1200 160 1.100 2.491678 1 54.95 1 0 0 1990
## 224 1364 224 2.500 3.318911 4 40.96 1 1 0 1991
## 253 1551 253 1.660 9.424565 4 31.98 1 1 0 1991
## 2 7 2 2.500 1.533001 0 7.78 2 1 0 1988
## 221 1361 221 1.400 7.191373 1 32.10 1 1 0 1991
## 145 1083 145 2.800 5.456998 0 21.43 1 0 1 1991
## 151 1089 151 1.400 4.105612 0 37.92 1 0 1 1991
## 124 971 124 2.400 11.925139 0 34.16 1 1 1 1991
## 77 445 77 1.220 1.225071 0 25.56 2 1 0 1992
## 256 1554 256 3.990 1.993982 1 28.02 1 0 0 1991
## 96 776 96 3.300 3.213998 1 29.89 1 1 0 1989
## 11 141 11 1.300 3.583047 0 10.86 2 1 0 1976
## 232 1422 232 5.200 11.221190 0 44.00 1 0 1 1982
## 73 431 73 4.500 2.714951 0 39.87 2 1 0 1990
## 50 290 50 4.190 2.680456 0 3.50 1 1 0 1991
## 171 1215 171 2.100 8.576775 0 52.11 1 1 1 1991
## 33 233 33 1.428 2.864370 0 7.68 1 1 0 1991
## 90 768 90 3.700 6.981265 0 26.86 1 1 1 1989
## 35 244 35 3.030 6.923674 0 9.21 1 0 1 1982
## 116 963 116 2.700 7.091002 1 41.88 1 1 1 1991
## 75 443 75 1.200 1.428993 0 27.22 2 1 0 1992
## 236 1434 236 3.690 4.178999 2 24.00 1 1 0 1991
## 38 249 38 3.620 3.316654 0 11.03 1 1 0 1991
## 70 351 70 6.300 1.578893 1 20.45 2 1 0 1979
## 104 786 104 2.560 5.077001 0 61.47 1 0 1 1989
## 55 295 55 1.787 2.453790 0 6.24 1 1 0 1991
## 282 1598 282 4.500 5.842998 0 39.55 1 1 1 1991
## 167 1211 167 2.600 8.277668 0 28.97 1 1 1 1991
## 227 1367 227 6.250 2.031941 3 19.73 2 1 0 1991
## 102 784 102 2.300 5.376960 0 39.12 1 0 1 1989
## 10 140 10 1.240 3.505754 0 9.42 2 1 0 1976
## 270 1581 270 1.940 2.559030 1 28.36 1 0 0 1991
## 146 1084 146 2.600 5.497247 0 49.16 1 0 1 1991
## 179 1229 179 7.070 3.457646 1 30.09 1 1 0 1989
## 158 1198 158 4.200 2.784306 0 42.89 1 0 0 1990
## 234 1432 234 2.010 2.395267 1 38.81 1 1 0 1991
## 111 916 111 2.400 3.824999 0 38.06 2 1 0 1995
## 72 430 72 6.700 2.457513 0 28.08 2 1 0 1990
## 89 767 89 1.900 6.749645 0 50.18 1 1 1 1989
## 248 1464 248 2.000 6.358178 0 49.37 1 1 1 1992
## 239 1439 239 8.000 2.291207 0 19.86 1 1 0 1991
## 120 967 120 1.600 11.327685 1 52.05 1 1 1 1991
## 13 143 13 3.200 3.852999 1 9.97 1 1 1 1984
## 24 195 24 7.000 3.632331 0 9.98 1 1 0 1989
## 80 448 80 1.030 1.657000 2 31.00 2 1 0 1992
## 87 764 87 1.800 4.378568 2 53.43 1 1 1 1989
## 60 341 60 4.200 2.061075 1 31.17 2 1 0 1979
## 25 197 25 0.000 1.686499 0 0.97 1 0 0 1985
## 219 1359 219 1.170 2.312506 7 31.70 1 1 0 1991
## 48 288 48 3.770 3.731845 1 8.54 1 1 0 1991
## 76 444 76 7.300 1.251538 0 24.68 2 1 0 1992
## 7 60 7 9.100 2.981000 0 14.26 1 1 0 1982
## 230 1420 230 5.400 10.298999 0 56.50 1 0 1 1982
## 152 1090 152 2.900 3.830998 0 39.16 1 0 1 1991
## 51 291 51 1.170 4.029050 1 10.18 2 1 0 1991
## 199 1325 199 1.400 4.845603 0 77.66 1 1 1 1990
## 194 1258 194 1.700 2.277687 2 30.08 1 1 0 1976
## 229 1395 229 2.590 2.813741 0 32.84 1 1 0 1976
## 275 1591 275 2.560 3.020516 0 27.48 1 1 0 1991
## 45 267 45 1.550 13.384647 3 8.50 1 1 0 1991
## 94 773 94 1.900 4.610782 3 48.88 1 0 1 1989
## 147 1085 147 2.000 5.643243 0 46.96 1 0 1 1991
## 226 1366 226 6.170 2.612946 3 49.95 2 1 0 1991
## 8 61 8 6.300 3.104421 3 15.91 1 1 0 1982
## 81 449 81 9.400 1.665000 1 21.29 2 1 0 1992
## 141 1079 141 2.100 4.467487 0 52.20 1 0 1 1991
## 177 1221 177 2.000 5.180002 0 54.80 1 0 1 1991
## 133 980 133 1.100 5.270930 0 42.22 1 1 1 1990
## 98 778 98 2.800 3.939002 0 55.84 1 0 0 1989
## 12 142 12 0.000 2.633001 0 5.00 2 1 0 1984
## 42 253 42 2.650 2.806566 0 10.02 2 1 0 1992
## 31 231 31 9.400 5.956002 0 5.65 1 1 1 1990
## ekijyouka kyouyounensuu kansyu kei kinkyuudo taisyo
## 223 0 25 2 250 0 0
## 15 1 39 2 250 0 0
## 180 0 40 2 250 3 1
## 214 0 40 2 250 3 1
## 57 1 37 2 250 3 1
## 192 0 40 2 250 3 1
## 63 0 37 2 250 3 1
## 132 0 25 2 600 0 0
## 149 0 25 2 600 0 0
## 215 0 25 2 250 3 1
## 267 0 25 2 250 0 0
## 245 1 24 2 300 0 0
## 168 0 25 2 600 0 0
## 30 0 26 2 400 0 0
## 47 0 25 2 250 0 0
## 32 0 17 2 400 0 0
## 97 0 27 2 600 2 1
## 225 0 25 2 250 0 0
## 130 1 26 2 600 0 0
## 178 0 25 2 600 0 0
## 22 0 25 2 600 0 0
## 92 0 27 2 600 3 1
## 176 0 25 2 600 3 1
## 107 0 27 2 350 0 0
## 79 1 24 2 250 0 0
## 139 0 25 2 600 0 0
## 212 0 40 2 250 3 1
## 18 0 24 2 600 0 0
## 241 0 25 2 250 3 1
## 200 1 26 2 600 3 1
## 64 0 37 2 250 3 1
## 20 0 25 2 600 0 0
## 183 0 40 2 250 3 1
## 16 1 39 2 250 0 0
## 99 0 27 2 600 0 0
## 34 0 34 2 300 0 0
## 156 1 28 2 500 0 0
## 113 0 24 2 250 0 0
## 125 0 25 2 600 0 0
## 274 0 25 2 250 0 0
## 169 0 25 2 600 3 1
## 144 0 25 2 600 3 1
## 106 0 27 2 300 0 0
## 211 0 40 2 250 3 1
## 166 0 26 2 600 0 0
## 161 0 10 2 600 0 0
## 82 1 27 2 400 0 0
## 184 0 26 2 250 3 1
## 242 0 25 2 250 0 0
## 114 0 24 2 250 0 0
## 69 1 37 2 250 0 0
## 121 0 25 2 600 3 1
## 19 0 25 2 600 3 1
## 185 0 26 2 250 3 1
## 278 0 25 2 250 0 0
## 1 1 12 2 200 0 0
## 252 0 24 2 250 0 0
## 170 0 25 2 600 0 0
## 164 0 26 2 600 0 0
## 126 0 25 2 600 0 0
## 240 0 25 2 250 0 0
## 153 0 25 2 600 0 0
## 205 0 26 2 250 3 1
## 46 0 25 2 250 0 0
## 150 0 25 2 600 0 0
## 210 0 25 2 250 3 1
## 115 0 24 2 250 0 0
## 135 1 26 2 600 0 0
## 108 0 26 2 250 0 0
## 181 0 40 2 250 3 1
## 254 0 25 2 250 0 0
## 21 0 25 2 600 0 0
## 231 0 34 2 350 0 0
## 155 1 28 2 500 3 1
## 187 1 26 2 250 3 1
## 91 0 27 2 600 0 0
## 197 0 40 2 250 3 1
## 40 0 25 2 250 0 0
## 140 0 25 2 600 0 0
## 162 0 26 2 600 0 0
## 86 1 27 2 600 0 0
## 190 0 26 2 250 3 1
## 206 0 26 2 250 3 1
## 202 0 26 2 250 3 1
## 237 0 25 2 250 0 0
## 85 0 27 2 600 3 1
## 41 0 25 2 250 0 0
## 186 1 26 2 250 0 0
## 148 0 25 2 600 0 0
## 272 0 25 2 250 0 0
## 71 0 26 2 250 0 0
## 110 0 21 2 250 0 0
## 281 0 25 2 450 0 0
## 261 0 25 2 450 0 0
## 61 1 37 2 250 3 1
## 276 0 25 2 450 0 0
## 136 1 26 2 600 0 0
## 262 0 25 2 450 0 0
## 188 1 26 2 250 3 1
## 93 1 27 2 600 0 0
## 266 0 25 2 250 0 0
## 165 0 26 2 600 0 0
## 228 0 40 2 450 0 0
## 56 1 40 2 250 3 1
## 233 0 25 2 500 0 0
## 250 0 25 2 250 0 0
## 255 0 25 2 250 0 0
## 26 0 26 2 250 3 1
## 173 0 25 2 600 0 0
## 277 0 25 2 250 0 0
## 44 0 24 2 250 0 0
## 201 0 26 2 250 3 1
## 247 1 25 2 450 0 0
## 217 0 25 2 250 3 1
## 249 0 25 2 250 0 0
## 204 0 26 2 250 3 1
## 172 0 25 2 600 0 0
## 251 0 25 2 250 0 0
## 29 0 27 2 250 0 0
## 14 0 25 2 600 0 0
## 159 0 26 2 600 0 0
## 65 0 37 2 250 2 1
## 3 1 24 2 250 0 0
## 163 0 26 2 600 0 0
## 53 0 25 2 450 0 0
## 112 0 32 2 700 0 0
## 67 0 37 2 250 3 1
## 49 0 25 2 300 0 0
## 220 0 25 2 250 3 1
## 208 0 26 2 250 3 1
## 128 0 26 2 600 0 0
## 9 0 27 2 250 0 0
## 157 0 26 2 600 0 0
## 6 1 24 2 250 0 0
## 101 0 27 2 600 3 1
## 58 1 37 2 250 3 1
## 244 1 24 2 300 0 0
## 138 0 25 2 600 3 1
## 119 0 25 2 600 0 0
## 131 1 26 2 250 0 0
## 105 0 27 2 250 0 0
## 39 0 25 2 250 0 0
## 117 0 25 2 600 3 1
## 52 0 25 2 450 0 0
## 189 0 40 2 250 3 1
## 265 0 25 2 250 0 0
## 37 0 25 2 250 0 0
## 207 0 27 2 250 3 1
## 68 0 37 2 250 3 1
## 263 0 25 2 450 0 0
## 66 1 28 2 250 0 0
## 103 0 27 2 600 0 0
## 258 0 25 2 300 0 0
## 23 0 25 2 600 0 0
## 84 0 27 2 600 0 0
## 142 0 25 2 600 3 1
## 62 0 37 2 250 3 1
## 280 0 25 2 450 3 1
## 43 0 25 2 250 0 0
## 243 0 25 2 250 0 0
## 28 0 40 2 250 3 1
## 4 1 24 2 250 0 0
## 268 0 25 2 450 0 0
## 54 0 25 2 250 0 0
## 88 1 27 2 600 0 0
## 174 0 25 2 600 0 0
## 198 0 40 2 250 3 1
## 279 0 25 2 450 0 0
## 143 0 25 2 600 3 1
## 175 0 25 2 600 0 0
## 78 1 24 2 250 0 0
## 182 0 40 2 250 3 1
## 74 1 26 2 250 0 0
## 118 0 25 2 600 3 1
## 83 0 33 2 900 0 0
## 222 0 25 2 250 2 1
## 196 0 40 2 250 3 1
## 218 0 25 2 250 3 1
## 273 0 25 2 450 0 0
## 264 0 25 2 250 0 0
## 271 0 25 2 450 0 0
## 100 0 27 2 600 3 1
## 160 0 26 2 600 0 0
## 224 0 25 2 250 3 1
## 253 0 25 2 250 0 0
## 2 0 28 2 250 2 1
## 221 0 25 2 250 3 1
## 145 0 25 2 600 0 0
## 151 0 25 2 600 0 0
## 124 0 25 2 600 0 0
## 77 1 24 2 250 0 0
## 256 0 25 2 250 0 0
## 96 0 27 2 600 0 0
## 11 0 40 2 250 0 0
## 232 0 34 2 350 0 0
## 73 1 26 2 250 0 0
## 50 0 25 2 300 0 0
## 171 0 25 2 600 0 0
## 33 0 25 2 400 0 0
## 90 0 27 2 600 0 0
## 35 0 34 2 350 0 0
## 116 0 25 2 600 0 0
## 75 1 24 2 250 0 0
## 236 0 25 2 250 0 0
## 38 0 25 2 250 0 0
## 70 1 37 2 250 0 0
## 104 0 27 2 350 0 0
## 55 0 25 2 450 0 0
## 282 0 25 2 450 0 0
## 167 0 25 2 600 3 1
## 227 0 25 2 250 0 0
## 102 0 27 2 600 3 1
## 10 0 40 2 250 0 0
## 270 0 25 2 250 0 0
## 146 0 25 2 600 3 1
## 179 1 27 2 300 0 0
## 158 0 26 2 600 0 0
## 234 0 25 2 250 0 0
## 111 0 21 2 250 0 0
## 72 1 26 2 250 3 1
## 89 1 27 2 600 0 0
## 248 1 24 2 500 0 0
## 239 0 25 2 250 0 0
## 120 0 25 2 600 0 0
## 13 0 32 2 700 0 0
## 24 1 27 2 300 0 0
## 80 1 24 2 250 0 0
## 87 1 27 2 600 0 0
## 60 1 37 2 250 3 1
## 25 1 31 2 250 0 0
## 219 0 25 2 250 3 1
## 48 0 25 2 300 0 0
## 76 1 24 2 250 0 0
## 7 0 34 2 250 3 1
## 230 0 34 2 350 0 0
## 152 0 25 2 600 0 0
## 51 0 25 2 250 0 0
## 199 1 26 2 600 3 1
## 194 0 40 2 250 3 1
## 229 0 40 2 450 0 0
## 275 0 25 2 250 0 0
## 45 0 25 2 250 0 0
## 94 1 27 2 600 0 0
## 147 0 25 2 600 3 1
## 226 0 25 2 250 0 0
## 8 0 34 2 250 3 1
## 81 1 24 2 250 0 0
## 141 0 25 2 600 0 0
## 177 0 25 2 600 0 0
## 133 1 26 2 600 0 0
## 98 0 27 2 600 3 1
## 12 0 32 2 250 0 0
## 42 0 24 2 200 0 0
## 31 0 26 2 400 0 0
test <-data[-sub,]
test
## OBJECTID sys_name slope uedokaburi masuhonsuu long kubun did kouhou nendo
## 5 22 5 1.80 4.412133 1 5.56 2 1 1 1992
## 17 167 17 4.80 5.841930 2 15.45 1 1 1 1991
## 27 200 27 2.12 2.417561 0 9.15 2 1 0 1976
## 36 247 36 2.26 2.336273 0 4.17 1 1 0 1991
## 59 340 59 4.50 2.244000 2 18.98 2 1 0 1979
## 95 775 95 2.10 3.617295 2 31.17 1 1 0 1989
## 109 896 109 3.50 3.160007 1 38.11 2 1 0 1990
## 122 969 122 1.80 12.289001 0 50.19 1 1 1 1991
## 123 970 123 2.40 12.201411 0 35.48 1 1 1 1991
## 127 974 127 1.80 8.155643 0 33.74 1 1 1 1990
## 129 976 129 2.50 5.257328 0 31.86 1 1 1 1990
## 134 981 134 3.90 5.204206 0 34.70 1 1 1 1990
## 137 1004 137 2.56 1.319381 5 29.01 2 1 0 1977
## 154 1101 154 1.90 4.014523 2 57.24 1 0 1 1988
## 191 1255 191 2.37 2.624365 2 29.00 1 1 0 1976
## 193 1257 193 2.05 2.293845 2 29.90 1 1 0 1976
## 195 1259 195 1.75 1.938404 2 32.00 1 1 0 1976
## 203 1333 203 3.73 2.509508 1 31.21 1 1 0 1990
## 209 1348 209 9.40 6.640145 0 32.30 1 1 1 1990
## 213 1353 213 2.06 2.240783 4 32.00 1 1 0 1976
## 216 1356 216 8.90 2.795862 4 25.90 1 1 0 1991
## 235 1433 235 1.11 2.970002 4 25.13 1 1 0 1991
## 238 1438 238 1.59 2.613002 5 33.09 1 1 0 1991
## 246 1462 246 4.10 5.822236 0 46.30 1 1 1 1991
## 257 1555 257 1.45 2.372000 6 33.94 1 0 0 1991
## 259 1561 259 3.07 4.151997 0 20.79 1 1 0 1991
## 260 1571 260 3.06 2.691162 4 32.29 1 1 0 1991
## 269 1580 269 3.00 7.043107 0 55.48 1 1 1 1991
## ekijyouka kyouyounensuu kansyu kei kinkyuudo taisyo
## 5 1 24 2 250 3 1
## 17 0 25 2 600 3 1
## 27 0 40 2 250 3 1
## 36 0 25 2 250 0 0
## 59 1 37 2 250 3 1
## 95 0 27 2 600 3 1
## 109 0 26 2 250 0 0
## 122 0 25 2 600 0 0
## 123 0 25 2 600 0 0
## 127 0 26 2 600 0 0
## 129 1 26 2 600 3 1
## 134 1 26 2 600 3 1
## 137 1 39 2 250 0 0
## 154 1 28 2 500 0 0
## 191 0 40 2 250 0 0
## 193 0 40 2 250 2 1
## 195 0 40 2 250 3 1
## 203 0 26 2 250 3 1
## 209 0 26 2 400 0 0
## 213 0 40 2 250 3 1
## 216 0 25 2 250 3 1
## 235 0 25 2 250 0 0
## 238 0 25 2 250 0 0
## 246 1 25 2 450 0 0
## 257 0 25 2 250 0 0
## 259 0 25 2 300 0 0
## 260 0 25 2 450 0 0
## 269 0 25 2 450 0 0
gesui <- gesui[-1:-2] #OBJECTID,sys_name列をデータから削除
gesui <- gesui[-13]
gesui <- gesui[-8]
gesui <- gesui[-10]
test <- test[-1:-2] #OBJECTID,sys_name列をデータから削除
test <- test[-13]
test <- test[-8]
test <- test[-10]
gesui$taisyo <- as.factor(gesui$taisyo)
gesui$kubun <- as.factor(gesui$kubun)
gesui$did <- as.factor(gesui$did)
gesui$ekijyouka <- as.factor(gesui$ekijyouka)
test$taisyo <- as.factor(test$taisyo)
test$kubun <- as.factor(test$kubun)
test$did <- as.factor(test$did)
test$ekijyouka <- as.factor(test$ekijyouka)
sapply(gesui, class)
## slope uedokaburi masuhonsuu long kubun
## "numeric" "numeric" "numeric" "numeric" "factor"
## did kouhou ekijyouka kyouyounensuu kei
## "factor" "numeric" "factor" "numeric" "numeric"
## taisyo
## "factor"
summary(gesui)
## slope uedokaburi masuhonsuu long kubun
## Min. :0.000 Min. : 1.009 Min. : 0.000 Min. : 0.97 1:200
## 1st Qu.:1.900 1st Qu.: 2.476 1st Qu.: 0.000 1st Qu.:20.66 2: 54
## Median :2.765 Median : 3.409 Median : 1.000 Median :30.00
## Mean :3.337 Mean : 4.206 Mean : 1.232 Mean :31.39
## 3rd Qu.:4.173 3rd Qu.: 5.397 3rd Qu.: 2.000 3rd Qu.:41.04
## Max. :9.900 Max. :13.385 Max. :11.000 Max. :96.82
## did kouhou ekijyouka kyouyounensuu kei taisyo
## 0: 64 Min. :0.0000 0:204 Min. :10.00 Min. :200.0 0:178
## 1:190 1st Qu.:0.0000 1: 50 1st Qu.:25.00 1st Qu.:250.0 1: 76
## Median :0.0000 Median :25.00 Median :250.0
## Mean :0.3307 Mean :27.35 Mean :391.9
## 3rd Qu.:1.0000 3rd Qu.:27.00 3rd Qu.:600.0
## Max. :1.0000 Max. :40.00 Max. :900.0
train <- gesui
stargazer(as.data.frame(gesui),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 254 | 3.337 | 2.024 | 0.000 | 1.900 | 4.173 | 9.900 |
uedokaburi | 254 | 4.206 | 2.546 | 1.009 | 2.476 | 5.397 | 13.385 |
masuhonsuu | 254 | 1.232 | 1.748 | 0 | 0 | 2 | 11 |
long | 254 | 31.387 | 15.583 | 0.970 | 20.660 | 41.043 | 96.820 |
kouhou | 254 | 0.331 | 0.471 | 0 | 0 | 1 | 1 |
kyouyounensuu | 254 | 27.354 | 5.069 | 10 | 25 | 27 | 40 |
kei | 254 | 391.929 | 163.533 | 200 | 250 | 600 | 900 |
stargazer(as.data.frame(train),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 254 | 3.337 | 2.024 | 0.000 | 1.900 | 4.173 | 9.900 |
uedokaburi | 254 | 4.206 | 2.546 | 1.009 | 2.476 | 5.397 | 13.385 |
masuhonsuu | 254 | 1.232 | 1.748 | 0 | 0 | 2 | 11 |
long | 254 | 31.387 | 15.583 | 0.970 | 20.660 | 41.043 | 96.820 |
kouhou | 254 | 0.331 | 0.471 | 0 | 0 | 1 | 1 |
kyouyounensuu | 254 | 27.354 | 5.069 | 10 | 25 | 27 | 40 |
kei | 254 | 391.929 | 163.533 | 200 | 250 | 600 | 900 |
stargazer(as.data.frame(test),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 28 | 3.056 | 1.964 | 1.110 | 1.875 | 3.558 | 9.400 |
uedokaburi | 28 | 4.328 | 2.828 | 1.319 | 2.406 | 5.399 | 12.289 |
masuhonsuu | 28 | 1.750 | 1.878 | 0 | 0 | 2.5 | 6 |
long | 28 | 30.505 | 12.764 | 4.170 | 25.707 | 34.130 | 57.240 |
kouhou | 28 | 0.393 | 0.497 | 0 | 0 | 1 | 1 |
kyouyounensuu | 28 | 28.964 | 6.221 | 24 | 25 | 30.2 | 40 |
kei | 28 | 375.000 | 152.449 | 250 | 250 | 525 | 600 |
#model = randomForest(taisyo ~ ., data = gesui)
model = randomForest(taisyo ~ ., data = train)
#model = randomForest(kinkyuudo ~ ., data = gesui)
model
##
## Call:
## randomForest(formula = taisyo ~ ., data = train)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 19.29%
## Confusion matrix:
## 0 1 class.error
## 0 167 11 0.06179775
## 1 38 38 0.50000000
#predition = predict(model, gesui)
predition = predict(model, test)
predition
## 5 17 27 36 59 95 109 122 123 127 129 134 137 154 191 193 195 203 209 213
## 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1
## 216 235 238 246 257 259 260 269
## 1 0 1 0 0 0 0 0
## Levels: 0 1
summary(predition)
## 0 1
## 20 8
table(predition,test$taisyo)
##
## predition 0 1
## 0 14 6
## 1 2 6
#sapply(gesui, class)
#summary(gesui)
model$importance
## MeanDecreaseGini
## slope 17.029285
## uedokaburi 21.763576
## masuhonsuu 5.936029
## long 23.536333
## kubun 2.052436
## did 2.518952
## kouhou 1.427450
## ekijyouka 2.976061
## kyouyounensuu 18.142166
## kei 6.381062
varImpPlot(model)
dim(gesui)
## [1] 254 11
sapply(gesui, class)
## slope uedokaburi masuhonsuu long kubun
## "numeric" "numeric" "numeric" "numeric" "factor"
## did kouhou ekijyouka kyouyounensuu kei
## "factor" "numeric" "factor" "numeric" "numeric"
## taisyo
## "factor"
head(gesui)
## slope uedokaburi masuhonsuu long kubun did kouhou ekijyouka kyouyounensuu
## 223 1.34 3.484858 4 20.20 1 1 0 0 25
## 15 1.74 1.436539 3 9.25 2 1 0 1 39
## 180 2.13 2.660371 2 29.31 1 1 0 0 40
## 214 2.32 4.574894 1 30.18 2 1 0 0 40
## 57 3.90 2.796943 2 30.01 2 1 0 1 37
## 192 2.30 2.346045 2 29.68 1 1 0 0 40
## kei taisyo
## 223 250 0
## 15 250 0
## 180 250 1
## 214 250 1
## 57 250 1
## 192 250 1
set.seed(123)#注1
#gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
# gesui$kinkyuudo, # 目的変数
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
gesui$taisyo, # 目的変数
doBest = T) #分岐に使う変数の数(mtry)を求めるフラグ
## mtry = 3 OOB error = 18.11%
## Searching left ...
## mtry = 2 OOB error = 18.11%
## 0 0.05
## Searching right ...
## mtry = 6 OOB error = 18.9%
## -0.04347826 0.05
set.seed(123)#注1 #gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数 # gesui\(kinkyuudo, # 目的変数 gesui.tune <- tuneRF(gesui %>% select(-taisyo),# 説明変数 gesui\)taisyo,# 目的変数 doBest = T)#分岐に使う変数の数(mtry)を求めるフラグ
この結果,特徴量の個数が 3個以上のときに,Out-of-Bag誤差(OOB error)は3.99% 2個のときに,Out-of-Bag誤差は4.57%、 1個のときに,Out-of-Bag誤差は4.36%、
となり,特徴量の個数が3個のときにOut-of-Bag誤差が最少となり, この個数に設定するのが良さそうであることがわかる*1
構築する決定木の個数を増やしてみる ntreeTry引数はデフォルトでは50となっており,50個の決定木を構築することがわかる.1500個の決定木を構築するように指定してみよう.
set.seed(123)#注1
#gesui.tune <- tuneRF(gesui %>% select(-kinkyuudo) ,# 説明変数
# gesui$kinkyuudo, # 目的変数
gesui.tune <- tuneRF(gesui %>% select(-taisyo) ,# 説明変数
gesui$taisyo, # 目的変数
ntreeTry=2500, #決定木数
trace = TRUE,
doBest = T)
## mtry = 3 OOB error = 18.5%
## Searching left ...
## mtry = 2 OOB error = 18.9%
## -0.0212766 0.05
## Searching right ...
## mtry = 6 OOB error = 18.5%
## 0 0.05
6個のときに,Out-of-Bag誤差(OOB error)が最大となり、5.06%となっている。
チューニングで求めたmtry(tuneRF()結果を、オブジェクトの$mtryに入っています)はこの関数の引数に代入します。
gesui.rf2 <- randomForest( # 予測、分類器の構築
# kinkyuudo ~ ., # モデル式
taisyo ~ ., # モデル式
data = gesui, # データ
mtry = gesui.tune$mtry) # 分岐に使う変数の数
predrandam = predict(gesui.rf2, test)
predrandam
## 5 17 27 36 59 95 109 122 123 127 129 134 137 154 191 193 195 203 209 213
## 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1
## 216 235 238 246 257 259 260 269
## 1 0 1 0 0 0 0 0
## Levels: 0 1
summary(predrandam)
## 0 1
## 20 8
table(predrandam,test$taisyo)
##
## predrandam 0 1
## 0 14 6
## 1 2 6
下水劣化推定変数の重要度
gesui.rf2
##
## Call:
## randomForest(formula = taisyo ~ ., data = gesui, mtry = gesui.tune$mtry)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 17.72%
## Confusion matrix:
## 0 1 class.error
## 0 168 10 0.05617978
## 1 35 41 0.46052632
x=gesui.rf2$importance
x
## MeanDecreaseGini
## slope 17.630621
## uedokaburi 21.741710
## masuhonsuu 6.025033
## long 21.856484
## kubun 2.095913
## did 2.514011
## kouhou 1.421411
## ekijyouka 3.103169
## kyouyounensuu 18.940177
## kei 6.265901
http://sfchaos.hatenablog.com/entry/20150628/p1 https://tjo.hatenablog.com/entry/2013/09/02/190449
出力結果の読み方 OOB estimate of error rate:誤判別率 Confusion matrix:縦軸が予測数、横軸が実際の数。下の例では”0”(緊急度3以下)と478個予測したうち、実際に”0”だったものが450個、“1”だったものが28個と読み取れます。
重要度の高い順番に並び替え
rank <- data.frame(x) # 重要度のリストをデータフレームに変換
rank$factor <- rownames(rank) # 行名になっている要因をデータフレームに追加
rank <- rank[order(rank[,1], decreasing=T),] # 重要度(偏回帰係数的なもの)順に並び替え
rownames(rank) <- 1:nrow(rank) # ランキングを行名にする
rank
## MeanDecreaseGini factor
## 1 21.856484 long
## 2 21.741710 uedokaburi
## 3 18.940177 kyouyounensuu
## 4 17.630621 slope
## 5 6.265901 kei
## 6 6.025033 masuhonsuu
## 7 3.103169 ekijyouka
## 8 2.514011 did
## 9 2.095913 kubun
## 10 1.421411 kouhou
plot(gesui.rf2)
varImpPlot(gesui.rf2)
参考 https://yolo-kiyoshi.com/2019/09/16/post-1226/ https://aotamasaki.hatenablog.com/entry/bias_in_feature_importances
# 別のサイトでのランダムフォレストによるEDAをRで実践 https://navaclass.com/random-forest-eda/
#set.seed(111)
#ランダムフォレストモデルの学習
#boston.rf <- randomForest(kinkyuudo ~ .,
#boston.rf <- randomForest(taisyo ~ .,
# data = train,
# importance = TRUE)
#テストデータに対する予測
#pred <- predict(boston.rf, newdata = test)
#観測値と予測値をプロット
#plot(test$taisyo, pred, main = boston.rf$call)
#curve(identity, add = TRUE)
pred = predict(gesui.rf2, test)
plot(test$taisyo, pred, main = gesui.rf2$call)
curve(identity, add = TRUE)
#予測誤差(RMSE:二乗平均平方根誤差)
#予測誤差の推定のため目的変数をニューリックに変換する。
rms <- function(act, pred) {
sqrt(mean((act - pred) ^ 2))
}
cat(" RMSE =", rms(test$taisyo, pred))
## Warning in Ops.factor(act, pred): '-' not meaningful for factors
## RMSE = NA
#線形回帰モデルの予測誤差と比較
cat(" RMSE = ",
rms(test$taisyo,
predict(lm(taisyo ~ ., data = train), newdata = test)))
## Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
## response will be ignored
## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
## Warning in Ops.factor(act, pred): '-' not meaningful for factors
## RMSE = NA
https://funatsu-lab.github.io/open-course-ware/machine-learning/random-forest/
#特徴量重要度の出力 type = 1
boston.imp <-
sort(gesui.rf2$importance, decreasing = TRUE)
barplot(boston.imp, names.arg = rownames(boston.imp))
ビジネスに活かすデータマイニング(尾崎豊) http://yut.hatenablog.com/entry/20120827/1346024147
stargazer(as.data.frame(gesui),type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
slope | 254 | 3.337 | 2.024 | 0.000 | 1.900 | 4.173 | 9.900 |
uedokaburi | 254 | 4.206 | 2.546 | 1.009 | 2.476 | 5.397 | 13.385 |
masuhonsuu | 254 | 1.232 | 1.748 | 0 | 0 | 2 | 11 |
long | 254 | 31.387 | 15.583 | 0.970 | 20.660 | 41.043 | 96.820 |
kouhou | 254 | 0.331 | 0.471 | 0 | 0 | 1 | 1 |
kyouyounensuu | 254 | 27.354 | 5.069 | 10 | 25 | 27 | 40 |
kei | 254 | 391.929 | 163.533 | 200 | 250 | 600 | 900 |
library(e1071)
d.svm<-svm(taisyo ~ ., data = train)
print(d.svm)
##
## Call:
## svm(formula = taisyo ~ ., data = train)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 140
predsvm<-predict(d.svm,newdata=test)
summary(predsvm)
## 0 1
## 20 8
kekka<-table(predsvm,test$taisyo)
kekka
##
## predsvm 0 1
## 0 14 6
## 1 2 6
library( nnet )
予測式
nn<-nnet(taisyo ~., data=train,size = 2, rang = .1, decay = 5e-4, maxit = 200 )
## # weights: 25
## initial value 166.866381
## iter 10 value 139.788985
## iter 20 value 122.976109
## iter 30 value 121.705225
## iter 40 value 121.700819
## iter 50 value 121.693387
## iter 60 value 121.687739
## iter 70 value 121.597941
## iter 80 value 120.776591
## iter 90 value 110.107610
## iter 100 value 108.928885
## iter 110 value 106.747153
## iter 120 value 106.273506
## iter 130 value 106.162321
## iter 140 value 105.967551
## iter 150 value 105.795665
## iter 160 value 105.726490
## iter 170 value 105.666118
## iter 180 value 105.579543
## iter 190 value 105.552047
## iter 200 value 105.546545
## final value 105.546545
## stopped after 200 iterations
nn
## a 10-2-1 network with 25 weights
## inputs: slope uedokaburi masuhonsuu long kubun2 did1 kouhou ekijyouka1 kyouyounensuu kei
## output(s): taisyo
## options were - entropy fitting decay=5e-04
nn_predict<-predict(nn,test,type="class")
table(nn_predict, test$taisyo)
##
## nn_predict 0 1
## 0 14 9
## 1 2 3
cat(test$taisyo, file = "testtaisyo2.txt", append =FALSE)
cat(nn_predict, file = "nnresult2.txt", append =FALSE)
nn_predict<-predict(nn,test,type=“raw”) nn_predict#推定値の生データ出力:https://mjin.doshisha.ac.jp/R/Chap_23/23.html nn_predict<-predict(nn,test,type=“class”)#推定値のグループ出力 #推定値グループのファイルテキスト出力http://takenaka-akio.org/doc/r_auto/chapter_03.html nn_predict cat(test\(taisyo, file = "testtaisyo.txt", append =FALSE) cat(predrandam, file = "lfresult.txt", append =FALSE) cat(predsvm, file = "svmresult.txt", append =FALSE) cat(nn_predict, file = "nnresult.txt", append =FALSE) kekka<-table(nn_predict, test\)taisyo) kekka