library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data2<-read.csv("C:/Users/Asus/Desktop/air_qulaity(matzu).csv")
data2<-data2[,-9]
#將變數轉換成連續或類別變項

attach(data2)
data2<-data.frame(data2)
str(data2)
## 'data.frame':    914 obs. of  12 variables:
##  $ SO2     : Factor w/ 51 levels "-","0.50 ","0.60 ",..: 22 24 17 18 21 19 6 12 17 18 ...
##  $ CO      : Factor w/ 65 levels "-","0.03 ","0.05 ",..: 17 14 14 36 18 27 24 17 20 21 ...
##  $ O3      : Factor w/ 443 levels "-","101.70 ",..: 61 42 67 77 57 116 260 273 311 265 ...
##  $ PM.10   : Factor w/ 99 levels "-","10.00 ","100.00 ",..: 48 39 38 47 44 36 40 34 40 29 ...
##  $ PM.2.5  : Factor w/ 64 levels "-","1.00 ","10.00 ",..: 15 8 8 17 18 9 8 7 15 9 ...
##  $ NOx     : Factor w/ 24 levels "-","0.00 ","1.00 ",..: 22 21 20 23 5 24 20 20 21 20 ...
##  $ NO      : Factor w/ 241 levels "-","0.15 ","0.18 ",..: 137 66 99 171 199 224 77 73 114 60 ...
##  $ NO2     : Factor w/ 563 levels "-","0.00 ","0.51 ",..: 329 304 230 358 498 352 175 224 250 187 ...
##  $ WS_HR   : Factor w/ 438 levels "-","0.27 ","0.40 ",..: 398 435 430 218 68 245 419 368 186 223 ...
##  $ AMB_TEMP: Factor w/ 724 levels "-","10.00 ","10.04 ",..: 409 363 382 274 338 230 120 47 59 91 ...
##  $ RH      : Factor w/ 788 levels "-","49.76 ","51.16 ",..: 232 295 314 631 592 701 364 225 327 521 ...
##  $ PH_RAIN : num  0 0 0 0 0 0 0 4.31 4.4 4.42 ...
temp = as.matrix(data2$SO2)
NAAA=which(temp == '-');NAAA
## [1] 762
data2 = data2[-NAAA,]
data2$SO2= as.numeric(as.matrix(data2$SO2))
temp1 = as.matrix(data2$NOx)
NAAA=which(temp1 == '-');NAAA
##  [1] 365 431 434 448 449 491 587 588 589 590 597 764 825 826
data2 = data2[-NAAA,]
data2$CO = as.numeric(as.matrix(data2$CO))
data2$O3 = as.numeric(as.matrix(data2$O3))
temp1=as.matrix(data2$PM.10)
NAAA1=which((temp1 == '-'))
data2 = data2[-NAAA1,]
data2$PM.10 = as.numeric(as.matrix(data2$PM.10))
temp2 = as.matrix(data2$PM.2.5)
NAAA2=which(temp2 == '-');NAAA2
##  [1] 112 113 114 115 116 117 118 119 120 121 390 391 392 393 394 395 396
## [18] 397 398 399 400 401 402 403 404 405 406 674 675
data2 = data2[-NAAA2,]
data2$PM.2.5 = as.numeric(as.matrix(data2$PM.2.5))
data2$NOx = as.numeric(as.matrix(data2$NOx))
data2$NO = as.numeric(as.matrix(data2$NO))
data2$NO2 = as.numeric(as.matrix(data2$NO2))

data2$WS_HR = as.numeric(as.matrix(data2$WS_HR))
data2$AMB_TEMP = as.numeric(as.matrix(data2$AMB_TEMP))
data2$PH_RAIN = as.numeric(as.matrix(data2$PH_RAIN))
data2$RH = as.numeric(as.matrix(data2$RH))
str(data2)
## 'data.frame':    864 obs. of  12 variables:
##  $ SO2     : num  2.5 2.7 2 2.1 2.4 2.2 0.9 1.6 2 2.1 ...
##  $ CO      : num  0.19 0.16 0.16 0.38 0.2 0.29 0.26 0.19 0.22 0.23 ...
##  $ O3      : num  25.2 22.7 25.8 27.3 24.7 32.2 48.1 49.5 53.9 48.6 ...
##  $ PM.10   : num  45 36 35 44 41 33 37 31 37 26 ...
##  $ PM.2.5  : num  21 15 15 23 24 16 15 14 21 16 ...
##  $ NOx     : num  7 6 5 8 11 9 5 5 6 5 ...
##  $ NO      : num  1.65 0.93 1.26 2.01 2.4 2.98 1.04 1 1.42 0.87 ...
##  $ NO2     : num  5.5 5.18 4.2 5.87 8.25 5.79 3.43 4.12 4.49 3.61 ...
##  $ WS_HR   : num  5.73 7.31 6.8 3.34 1.62 3.64 6.07 5.25 2.99 3.39 ...
##  $ AMB_TEMP: num  23.2 22.2 22.7 19.5 21.5 ...
##  $ RH      : num  78.8 81.2 82 90.2 89 ...
##  $ PH_RAIN : num  0 0 0 0 0 0 0 4.31 4.4 4.42 ...
temp3 = as.matrix(data2$PH_RAIN)
NO_RAIN=which(temp3 == '0');NO_RAIN
##   [1]   1   2   3   4   5   6   7  12  13  14  15  16  17  19  22  24  25
##  [18]  26  27  29  30  33  34  35  36  37  38  39  40  41  42  43  44  47
##  [35]  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  67  68
##  [52]  70  71  72  75  76  77  79  80  81  82  84  91  93  94  96  97  98
##  [69]  99 100 101 102 103 104 105 108 110 111 112 113 116 117 118 119 120
##  [86] 121 122 123 124 125 128 129 130 131 132 135 137 138 139 140 144 145
## [103] 147 150 151 152 153 154 155 156 157 159 160 161 162 163 164 165 167
## [120] 168 169 172 175 176 179 180 181 182 183 184 190 191 193 194 195 196
## [137] 197 200 201 202 205 206 207 208 209 211 215 220 223 224 225 226 227
## [154] 228 229 230 231 233 234 237 238 239 240 241 242 243 246 247 249 250
## [171] 251 252 254 255 258 261 262 266 269 271 272 273 274 275 276 277 279
## [188] 280 283 284 285 289 297 298 299 305 306 307 308 310 311 312 313 314
## [205] 315 317 318 319 320 323 324 325 329 330 331 332 333 334 337 340 343
## [222] 344 345 346 347 351 355 356 357 358 359 360 361 362 363 364 365 367
## [239] 368 369 370 371 372 373 377 380 381 382 383 384 385 386 387 391 392
## [256] 393 394 395 396 397 398 399 400 401 402 404 405 406 407 409 410 411
## [273] 413 417 420 422 423 424 425 426 427 428 429 430 433 434 435 436 437
## [290] 438 442 445 446 447 448 449 453 455 457 458 459 460 461 462 463 464
## [307] 465 466 467 468 469 470 471 474 475 477 478 479 480 481 482 487 488
## [324] 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
## [341] 508 511 512 513 514 516 517 518 519 520 521 522 523 524 525 526 527
## [358] 528 529 530 533 534 535 540 541 542 543 544 545 546 547 548 549 550
## [375] 551 552 553 554 555 556 557 558 559 560 561 562 563 564 566 571 572
## [392] 573 574 575 576 577 580 581 582 584 588 590 593 594 597 599 600 601
## [409] 603 606 607 608 609 610 612 613 617 618 619 620 627 628 632 633 634
## [426] 635 636 637 638 639 641 643 644 646 648 649 650 651 652 653 654 656
## [443] 657 658 659 660 661 662 664 665 666 667 668 670 671 672 673 674 684
## [460] 685 686 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
## [477] 706 707 708 709 710 711 712 713 714 715 716 718 719 720 721 722 723
## [494] 724 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
## [511] 742 743 744 745 746 747 748 749 750 751 752 754 755 756 757 758 759
## [528] 760 761 762 763 764 766 767 768 769 770 771 775 776 777 779 780 781
## [545] 782 783 785 786 789 790 792 793 794 795 796 797 799 800 801 802 803
## [562] 804 806 807 808 809 810 812 813 814 815 816 817 819 821 824 827 829
## [579] 830 831 832 834 835 836 837 838 839 840 841 842 843 844 845 846 850
## [596] 851 852 853 854 855 856 857 858 859 860 861 862 863 864
data3 = data2[-NO_RAIN,]
data3$PH_RAIN<-ifelse(data3$PH_RAIN<5,1,0)
# 挑出70% 的資料進行建模 #
n <- nrow(data3)
set.seed(106354012)
t_idx <- sample(seq_len(n), size = round(0.7 * n))
data_train <- data3[t_idx,] %>% as.data.frame()
data_test <- data3[ - t_idx,] %>% as.data.frame()
#開始建模
model<-glm(PH_RAIN~.,family = binomial("logit"),data=data_train)
summary(model)
## 
## Call:
## glm(formula = PH_RAIN ~ ., family = binomial("logit"), data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3930  -0.7803   0.5178   0.7803   2.0157  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -3.78897    3.92658  -0.965  0.33457   
## SO2          0.15580    0.39294   0.396  0.69175   
## CO          -0.42058    2.33721  -0.180  0.85719   
## O3           0.06353    0.02275   2.793  0.00522 **
## PM.10       -0.04845    0.03351  -1.446  0.14822   
## PM.2.5       0.11091    0.05181   2.141  0.03231 * 
## NOx          0.00898    0.65526   0.014  0.98907   
## NO           0.01581    0.69675   0.023  0.98190   
## NO2          0.08808    0.66027   0.133  0.89387   
## WS_HR        0.30765    0.19097   1.611  0.10718   
## AMB_TEMP    -0.11967    0.05005  -2.391  0.01680 * 
## RH           0.02548    0.03629   0.702  0.48259   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.51  on 177  degrees of freedom
## Residual deviance: 179.92  on 166  degrees of freedom
## AIC: 203.92
## 
## Number of Fisher Scoring iterations: 4
#
step(model,direction = "both")
## Start:  AIC=203.92
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NOx + NO + NO2 + WS_HR + 
##     AMB_TEMP + RH
## 
##            Df Deviance    AIC
## - NOx       1   179.92 201.92
## - NO        1   179.92 201.92
## - NO2       1   179.94 201.94
## - CO        1   179.95 201.95
## - SO2       1   180.08 202.08
## - RH        1   180.41 202.41
## <none>          179.92 203.92
## - PM.10     1   182.04 204.04
## - WS_HR     1   182.64 204.64
## - PM.2.5    1   184.81 206.81
## - AMB_TEMP  1   186.13 208.13
## - O3        1   188.64 210.64
## 
## Step:  AIC=201.92
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NO + NO2 + WS_HR + 
##     AMB_TEMP + RH
## 
##            Df Deviance    AIC
## - NO        1   179.93 199.93
## - CO        1   179.95 199.95
## - SO2       1   180.08 200.08
## - RH        1   180.41 200.41
## - NO2       1   180.56 200.56
## <none>          179.92 201.92
## - PM.10     1   182.04 202.04
## - WS_HR     1   182.64 202.64
## + NOx       1   179.92 203.92
## - PM.2.5    1   184.84 204.84
## - AMB_TEMP  1   186.13 206.13
## - O3        1   188.67 208.67
## 
## Step:  AIC=199.93
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP + 
##     RH
## 
##            Df Deviance    AIC
## - CO        1   179.96 197.96
## - SO2       1   180.08 198.08
## - RH        1   180.42 198.42
## - NO2       1   180.68 198.68
## <none>          179.93 199.93
## - PM.10     1   182.34 200.34
## - WS_HR     1   182.69 200.69
## + NO        1   179.92 201.92
## + NOx       1   179.92 201.92
## - PM.2.5    1   185.19 203.19
## - AMB_TEMP  1   186.45 204.45
## - O3        1   188.75 206.75
## 
## Step:  AIC=197.96
## PH_RAIN ~ SO2 + O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP + 
##     RH
## 
##            Df Deviance    AIC
## - SO2       1   180.15 196.15
## - RH        1   180.44 196.44
## - NO2       1   180.70 196.70
## <none>          179.96 197.96
## - PM.10     1   182.38 198.38
## - WS_HR     1   182.83 198.83
## + CO        1   179.93 199.93
## + NO        1   179.95 199.95
## + NOx       1   179.95 199.95
## - PM.2.5    1   185.29 201.29
## - AMB_TEMP  1   187.38 203.38
## - O3        1   188.75 204.75
## 
## Step:  AIC=196.15
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP + RH
## 
##            Df Deviance    AIC
## - RH        1   180.54 194.54
## - NO2       1   181.19 195.19
## <none>          180.15 196.15
## - PM.10     1   182.47 196.47
## - WS_HR     1   182.93 196.93
## + SO2       1   179.96 197.96
## + CO        1   180.08 198.08
## + NO        1   180.15 198.15
## + NOx       1   180.15 198.15
## - PM.2.5    1   185.79 199.79
## - AMB_TEMP  1   187.85 201.85
## - O3        1   188.84 202.84
## 
## Step:  AIC=194.54
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP
## 
##            Df Deviance    AIC
## - NO2       1   181.93 193.93
## <none>          180.54 194.54
## - PM.10     1   183.08 195.08
## - WS_HR     1   183.16 195.16
## + RH        1   180.15 196.15
## + SO2       1   180.44 196.44
## + CO        1   180.49 196.49
## + NOx       1   180.54 196.54
## + NO        1   180.54 196.54
## - PM.2.5    1   185.90 197.90
## - AMB_TEMP  1   187.89 199.89
## - O3        1   189.12 201.12
## 
## Step:  AIC=193.93
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + WS_HR + AMB_TEMP
## 
##            Df Deviance    AIC
## - WS_HR     1   183.22 193.22
## - PM.10     1   183.54 193.54
## <none>          181.93 193.93
## + NO2       1   180.54 194.54
## + NOx       1   180.66 194.66
## + RH        1   181.19 195.19
## + SO2       1   181.59 195.59
## + NO        1   181.77 195.77
## + CO        1   181.91 195.91
## - PM.2.5    1   187.08 197.08
## - O3        1   189.57 199.57
## - AMB_TEMP  1   193.18 203.18
## 
## Step:  AIC=193.22
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + AMB_TEMP
## 
##            Df Deviance    AIC
## - PM.10     1   183.93 191.93
## <none>          183.22 193.22
## + WS_HR     1   181.93 193.93
## + RH        1   182.93 194.93
## - PM.2.5    1   187.08 195.08
## + SO2       1   183.14 195.14
## + CO        1   183.15 195.15
## + NO2       1   183.16 195.16
## + NOx       1   183.19 195.19
## + NO        1   183.20 195.20
## - O3        1   190.10 198.10
## - AMB_TEMP  1   195.57 203.57
## 
## Step:  AIC=191.93
## PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP
## 
##            Df Deviance    AIC
## <none>          183.93 191.93
## + PM.10     1   183.22 193.22
## + RH        1   183.49 193.49
## + WS_HR     1   183.54 193.54
## + NOx       1   183.86 193.86
## + NO2       1   183.87 193.87
## + SO2       1   183.88 193.88
## + NO        1   183.88 193.88
## + CO        1   183.90 193.90
## - PM.2.5    1   189.28 195.28
## - O3        1   191.18 197.18
## - AMB_TEMP  1   202.45 208.45
## 
## Call:  glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"), 
##     data = data_train)
## 
## Coefficients:
## (Intercept)           O3       PM.2.5     AMB_TEMP  
##     0.89075      0.04475      0.05050     -0.13787  
## 
## Degrees of Freedom: 177 Total (i.e. Null);  174 Residual
## Null Deviance:       227.5 
## Residual Deviance: 183.9     AIC: 191.9
model2<- glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"), 
             data = data_train)
summary(model2)
## 
## Call:
## glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"), 
##     data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6070  -0.8582   0.4918   0.8020   1.8447  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.89075    0.94479   0.943  0.34579    
## O3           0.04475    0.01719   2.604  0.00922 ** 
## PM.2.5       0.05050    0.02294   2.201  0.02774 *  
## AMB_TEMP    -0.13787    0.03432  -4.017 5.91e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.51  on 177  degrees of freedom
## Residual deviance: 183.93  on 174  degrees of freedom
## AIC: 191.93
## 
## Number of Fisher Scoring iterations: 4
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(model2)
##       O3   PM.2.5 AMB_TEMP 
## 1.107959 1.095418 1.014624
anova(model,test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: PH_RAIN
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                       177     227.51              
## SO2       1   0.0668       176     227.44 0.7960752    
## CO        1  16.7205       175     210.72 4.331e-05 ***
## O3        1  12.2587       174     198.47 0.0004631 ***
## PM.10     1   0.0368       173     198.43 0.8479026    
## PM.2.5    1   6.0074       172     192.42 0.0142459 *  
## NOx       1   0.0239       171     192.40 0.8770913    
## NO        1   1.4858       170     190.91 0.2228634    
## NO2       1   0.0080       169     190.90 0.9286556    
## WS_HR     1   4.7593       168     186.15 0.0291397 *  
## AMB_TEMP  1   5.7373       167     180.41 0.0166084 *  
## RH        1   0.4894       166     179.92 0.4842123    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-glm(formula = PH_RAIN ~ CO + O3 + AMB_TEMP,family = binomial("logit"), data = data_train )
summary(model3)
## 
## Call:
## glm(formula = PH_RAIN ~ CO + O3 + AMB_TEMP, family = binomial("logit"), 
##     data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2136  -0.8762   0.5149   0.8086   1.7502  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.49739    1.28188   0.388 0.698004    
## CO           1.72859    1.81168   0.954 0.340013    
## O3           0.05614    0.01664   3.373 0.000743 ***
## AMB_TEMP    -0.12347    0.04068  -3.035 0.002407 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.51  on 177  degrees of freedom
## Residual deviance: 188.32  on 174  degrees of freedom
## AIC: 196.32
## 
## Number of Fisher Scoring iterations: 4
anova(model3,test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: PH_RAIN
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                       177     227.51              
## CO        1   16.787       176     210.72 4.181e-05 ***
## O3        1   12.258       175     198.47 0.0004633 ***
## AMB_TEMP  1   10.143       174     188.32 0.0014483 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(model3)
##       CO       O3 AMB_TEMP 
## 1.466196 1.015427 1.475001
outlier.test(model)
## Warning: 'outlier.test' is deprecated.
## Use 'outlierTest' instead.
## See help("Deprecated") and help("car-deprecated").
## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##      rstudent unadjusted p-value Bonferonni p
## 279 -2.571514           0.010126           NA
attach(data_train)
## The following objects are masked from data2:
## 
##     AMB_TEMP, CO, NO, NO2, NOx, O3, PH_RAIN, PM.10, PM.2.5, RH,
##     SO2, WS_HR
#
model4<-glm(PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP),
            family = binomial(link="logit"),data=data_train)
summary(model4)
## 
## Call:
## glm(formula = PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO + 
##     O3 + AMB_TEMP), family = binomial(link = "logit"), data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1953  -0.9270   0.5136   0.8316   1.7895  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)      2.925875   7.943214   0.368    0.713
## CO             -14.930833  21.561425  -0.692    0.489
## O3              -0.049172   0.226904  -0.217    0.828
## AMB_TEMP        -0.271011   0.325803  -0.832    0.406
## CO:O3            0.567679   0.673701   0.843    0.399
## CO:AMB_TEMP      0.983929   0.986909   0.997    0.319
## O3:AMB_TEMP      0.005609   0.009576   0.586    0.558
## CO:O3:AMB_TEMP  -0.030848   0.029773  -1.036    0.300
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.51  on 177  degrees of freedom
## Residual deviance: 186.52  on 170  degrees of freedom
## AIC: 202.52
## 
## Number of Fisher Scoring iterations: 5
step(model4,direction = "both")
## Start:  AIC=202.52
## PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO + O3 + 
##     AMB_TEMP)
## 
##                  Df Deviance    AIC
## - CO:O3:AMB_TEMP  1   187.61 201.61
## <none>                186.52 202.52
## 
## Step:  AIC=201.61
## PH_RAIN ~ CO + O3 + AMB_TEMP + CO:O3 + CO:AMB_TEMP + O3:AMB_TEMP
## 
##                  Df Deviance    AIC
## - CO:AMB_TEMP     1   187.62 199.62
## - CO:O3           1   188.12 200.12
## - O3:AMB_TEMP     1   188.18 200.18
## <none>                187.61 201.61
## + CO:O3:AMB_TEMP  1   186.52 202.52
## 
## Step:  AIC=199.61
## PH_RAIN ~ CO + O3 + AMB_TEMP + CO:O3 + O3:AMB_TEMP
## 
##               Df Deviance    AIC
## - CO:O3        1   188.15 198.15
## - O3:AMB_TEMP  1   188.21 198.21
## <none>             187.62 199.62
## + CO:AMB_TEMP  1   187.61 201.61
## 
## Step:  AIC=198.15
## PH_RAIN ~ CO + O3 + AMB_TEMP + O3:AMB_TEMP
## 
##               Df Deviance    AIC
## - O3:AMB_TEMP  1   188.32 196.32
## - CO           1   189.28 197.28
## <none>             188.15 198.15
## + CO:O3        1   187.62 199.62
## + CO:AMB_TEMP  1   188.12 200.12
## 
## Step:  AIC=196.32
## PH_RAIN ~ CO + O3 + AMB_TEMP
## 
##               Df Deviance    AIC
## - CO           1   189.28 195.28
## <none>             188.32 196.32
## + O3:AMB_TEMP  1   188.15 198.15
## + CO:O3        1   188.21 198.21
## + CO:AMB_TEMP  1   188.28 198.28
## - AMB_TEMP     1   198.47 204.47
## - O3           1   200.87 206.87
## 
## Step:  AIC=195.28
## PH_RAIN ~ O3 + AMB_TEMP
## 
##               Df Deviance    AIC
## <none>             189.28 195.28
## + CO           1   188.32 196.32
## + O3:AMB_TEMP  1   189.28 197.28
## - O3           1   203.38 207.38
## - AMB_TEMP     1   211.35 215.35
## 
## Call:  glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"), 
##     data = data_train)
## 
## Coefficients:
## (Intercept)           O3     AMB_TEMP  
##     1.35691      0.05797     -0.14656  
## 
## Degrees of Freedom: 177 Total (i.e. Null);  175 Residual
## Null Deviance:       227.5 
## Residual Deviance: 189.3     AIC: 195.3
model5<-glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"), 
            data = data_train)
summary(model5)
## 
## Call:
## glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"), 
##     data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0373  -0.8956   0.5135   0.8134   1.7046  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.35691    0.92432   1.468   0.1421    
## O3           0.05797    0.01637   3.540   0.0004 ***
## AMB_TEMP    -0.14656    0.03383  -4.332 1.47e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.51  on 177  degrees of freedom
## Residual deviance: 189.28  on 175  degrees of freedom
## AIC: 195.28
## 
## Number of Fisher Scoring iterations: 4
# 機器學習 #
table(data_train$PH_RAIN)
## 
##   0   1 
##  60 118
a<-118/178#資料集機率層級
modle_train <- glm(PH_RAIN ~ CO + O3 + AMB_TEMP, family = binomial("logit"),data =data_train)
result <- predict(modle_train, newdata = data_test, type = "response")
result_Approved <- ifelse(result >a, 1, 0)
cm <- table(data_test$PH_RAIN, result_Approved, dnn = c("實際", "預測"))
cm
##     預測
## 實際  0  1
##    0 19 11
##    1 12 35
cm[4] / sum(cm[, 2])
## [1] 0.7608696
cm[1] / sum(cm[, 1])
## [1] 0.6129032
accuracy <- sum(diag(cm)) / sum(cm);accuracy
## [1] 0.7012987
# 機器學習 #
modle_train <- glm(PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"),data =data_train)
result <- predict(modle_train, newdata = data_test, type = "response")
result_Approved <- ifelse(result > a, 1, 0)
cm <- table(data_test$PH_RAIN, result_Approved, dnn = c("實際", "預測"))
cm
##     預測
## 實際  0  1
##    0 19 11
##    1 14 33
cm[4] / sum(cm[, 2])
## [1] 0.75
cm[1] / sum(cm[, 1])
## [1] 0.5757576
accuracy <- sum(diag(cm)) / sum(cm);accuracy
## [1] 0.6753247
#畫ROC曲線
library("ROCR")
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
pred <- prediction(result, data_test$PH_RAIN)
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
#計算AUC
auc <- performance(pred, "auc")

#畫圖
plot(perf, col = rainbow(7), main = "ROC curve", xlab = "1 - Specificity(FPR)", ylab = "Sensitivity(TPR)")

abline(0, 1)
#實際AUC值
text(0.5, 0.5, as.character(auc@y.values[[1]]))

#AVPLOTS
avPlots(model2)

#畫盒鬚圖
# c(1,2),表示建立一個2x2的空間,用來呈現後續的圖
par(mfrow = c(1,1),cex=1.2,font=2,col=4) 


boxplot(formula = data_train$SO2 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "SO2濃度 (ppb)",               # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$CO ~ data_train$PH_RAIN ,  # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "CO濃度 (ppb)",                # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$O3 ~ data_train$PH_RAIN ,  # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "O3濃度 (ppb)",                # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$PM.10 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "PM10濃度 (ppb)",              # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$PM.2.5 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "PM2.5濃度 (ppb)",             # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$NOx ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "氮氧化濃度 (ppb)",            # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$NO2 ~ data_train$PH_RAIN ,  # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "二氧化氮濃度 (ppb)",          # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$NO ~ data_train$PH_RAIN ,  # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "一氧化氮濃度 (ppb)",          # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$WS_HR ~ data_train$PH_RAIN ,# Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                         # 資料
        xlab = "是否為酸雨",                  # X軸名稱
        ylab = "小時風速值(m/sec)",                  # Y軸名稱
        col = "gray")                         # 顏色

boxplot(formula = data_train$AMB_TEMP ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                              # 資料
        xlab = "是否為酸雨",                       # X軸名稱
        ylab = "攝氏溫度(℃)",                      # Y軸名稱
        col =  "gray")                             # 顏色

boxplot(formula = data_train$RH ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值) 
        data = data_train,                              # 資料
        xlab = "是否為酸雨",                       # X軸名稱
        ylab = "相對濕度",                         # Y軸名稱
        col =  "gray")                             # 顏色