library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data2<-read.csv("C:/Users/Asus/Desktop/air_qulaity(matzu).csv")
data2<-data2[,-9]
#將變數轉換成連續或類別變項
attach(data2)
data2<-data.frame(data2)
str(data2)
## 'data.frame': 914 obs. of 12 variables:
## $ SO2 : Factor w/ 51 levels "-","0.50 ","0.60 ",..: 22 24 17 18 21 19 6 12 17 18 ...
## $ CO : Factor w/ 65 levels "-","0.03 ","0.05 ",..: 17 14 14 36 18 27 24 17 20 21 ...
## $ O3 : Factor w/ 443 levels "-","101.70 ",..: 61 42 67 77 57 116 260 273 311 265 ...
## $ PM.10 : Factor w/ 99 levels "-","10.00 ","100.00 ",..: 48 39 38 47 44 36 40 34 40 29 ...
## $ PM.2.5 : Factor w/ 64 levels "-","1.00 ","10.00 ",..: 15 8 8 17 18 9 8 7 15 9 ...
## $ NOx : Factor w/ 24 levels "-","0.00 ","1.00 ",..: 22 21 20 23 5 24 20 20 21 20 ...
## $ NO : Factor w/ 241 levels "-","0.15 ","0.18 ",..: 137 66 99 171 199 224 77 73 114 60 ...
## $ NO2 : Factor w/ 563 levels "-","0.00 ","0.51 ",..: 329 304 230 358 498 352 175 224 250 187 ...
## $ WS_HR : Factor w/ 438 levels "-","0.27 ","0.40 ",..: 398 435 430 218 68 245 419 368 186 223 ...
## $ AMB_TEMP: Factor w/ 724 levels "-","10.00 ","10.04 ",..: 409 363 382 274 338 230 120 47 59 91 ...
## $ RH : Factor w/ 788 levels "-","49.76 ","51.16 ",..: 232 295 314 631 592 701 364 225 327 521 ...
## $ PH_RAIN : num 0 0 0 0 0 0 0 4.31 4.4 4.42 ...
temp = as.matrix(data2$SO2)
NAAA=which(temp == '-');NAAA
## [1] 762
data2 = data2[-NAAA,]
data2$SO2= as.numeric(as.matrix(data2$SO2))
temp1 = as.matrix(data2$NOx)
NAAA=which(temp1 == '-');NAAA
## [1] 365 431 434 448 449 491 587 588 589 590 597 764 825 826
data2 = data2[-NAAA,]
data2$CO = as.numeric(as.matrix(data2$CO))
data2$O3 = as.numeric(as.matrix(data2$O3))
temp1=as.matrix(data2$PM.10)
NAAA1=which((temp1 == '-'))
data2 = data2[-NAAA1,]
data2$PM.10 = as.numeric(as.matrix(data2$PM.10))
temp2 = as.matrix(data2$PM.2.5)
NAAA2=which(temp2 == '-');NAAA2
## [1] 112 113 114 115 116 117 118 119 120 121 390 391 392 393 394 395 396
## [18] 397 398 399 400 401 402 403 404 405 406 674 675
data2 = data2[-NAAA2,]
data2$PM.2.5 = as.numeric(as.matrix(data2$PM.2.5))
data2$NOx = as.numeric(as.matrix(data2$NOx))
data2$NO = as.numeric(as.matrix(data2$NO))
data2$NO2 = as.numeric(as.matrix(data2$NO2))
data2$WS_HR = as.numeric(as.matrix(data2$WS_HR))
data2$AMB_TEMP = as.numeric(as.matrix(data2$AMB_TEMP))
data2$PH_RAIN = as.numeric(as.matrix(data2$PH_RAIN))
data2$RH = as.numeric(as.matrix(data2$RH))
str(data2)
## 'data.frame': 864 obs. of 12 variables:
## $ SO2 : num 2.5 2.7 2 2.1 2.4 2.2 0.9 1.6 2 2.1 ...
## $ CO : num 0.19 0.16 0.16 0.38 0.2 0.29 0.26 0.19 0.22 0.23 ...
## $ O3 : num 25.2 22.7 25.8 27.3 24.7 32.2 48.1 49.5 53.9 48.6 ...
## $ PM.10 : num 45 36 35 44 41 33 37 31 37 26 ...
## $ PM.2.5 : num 21 15 15 23 24 16 15 14 21 16 ...
## $ NOx : num 7 6 5 8 11 9 5 5 6 5 ...
## $ NO : num 1.65 0.93 1.26 2.01 2.4 2.98 1.04 1 1.42 0.87 ...
## $ NO2 : num 5.5 5.18 4.2 5.87 8.25 5.79 3.43 4.12 4.49 3.61 ...
## $ WS_HR : num 5.73 7.31 6.8 3.34 1.62 3.64 6.07 5.25 2.99 3.39 ...
## $ AMB_TEMP: num 23.2 22.2 22.7 19.5 21.5 ...
## $ RH : num 78.8 81.2 82 90.2 89 ...
## $ PH_RAIN : num 0 0 0 0 0 0 0 4.31 4.4 4.42 ...
temp3 = as.matrix(data2$PH_RAIN)
NO_RAIN=which(temp3 == '0');NO_RAIN
## [1] 1 2 3 4 5 6 7 12 13 14 15 16 17 19 22 24 25
## [18] 26 27 29 30 33 34 35 36 37 38 39 40 41 42 43 44 47
## [35] 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 67 68
## [52] 70 71 72 75 76 77 79 80 81 82 84 91 93 94 96 97 98
## [69] 99 100 101 102 103 104 105 108 110 111 112 113 116 117 118 119 120
## [86] 121 122 123 124 125 128 129 130 131 132 135 137 138 139 140 144 145
## [103] 147 150 151 152 153 154 155 156 157 159 160 161 162 163 164 165 167
## [120] 168 169 172 175 176 179 180 181 182 183 184 190 191 193 194 195 196
## [137] 197 200 201 202 205 206 207 208 209 211 215 220 223 224 225 226 227
## [154] 228 229 230 231 233 234 237 238 239 240 241 242 243 246 247 249 250
## [171] 251 252 254 255 258 261 262 266 269 271 272 273 274 275 276 277 279
## [188] 280 283 284 285 289 297 298 299 305 306 307 308 310 311 312 313 314
## [205] 315 317 318 319 320 323 324 325 329 330 331 332 333 334 337 340 343
## [222] 344 345 346 347 351 355 356 357 358 359 360 361 362 363 364 365 367
## [239] 368 369 370 371 372 373 377 380 381 382 383 384 385 386 387 391 392
## [256] 393 394 395 396 397 398 399 400 401 402 404 405 406 407 409 410 411
## [273] 413 417 420 422 423 424 425 426 427 428 429 430 433 434 435 436 437
## [290] 438 442 445 446 447 448 449 453 455 457 458 459 460 461 462 463 464
## [307] 465 466 467 468 469 470 471 474 475 477 478 479 480 481 482 487 488
## [324] 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
## [341] 508 511 512 513 514 516 517 518 519 520 521 522 523 524 525 526 527
## [358] 528 529 530 533 534 535 540 541 542 543 544 545 546 547 548 549 550
## [375] 551 552 553 554 555 556 557 558 559 560 561 562 563 564 566 571 572
## [392] 573 574 575 576 577 580 581 582 584 588 590 593 594 597 599 600 601
## [409] 603 606 607 608 609 610 612 613 617 618 619 620 627 628 632 633 634
## [426] 635 636 637 638 639 641 643 644 646 648 649 650 651 652 653 654 656
## [443] 657 658 659 660 661 662 664 665 666 667 668 670 671 672 673 674 684
## [460] 685 686 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
## [477] 706 707 708 709 710 711 712 713 714 715 716 718 719 720 721 722 723
## [494] 724 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
## [511] 742 743 744 745 746 747 748 749 750 751 752 754 755 756 757 758 759
## [528] 760 761 762 763 764 766 767 768 769 770 771 775 776 777 779 780 781
## [545] 782 783 785 786 789 790 792 793 794 795 796 797 799 800 801 802 803
## [562] 804 806 807 808 809 810 812 813 814 815 816 817 819 821 824 827 829
## [579] 830 831 832 834 835 836 837 838 839 840 841 842 843 844 845 846 850
## [596] 851 852 853 854 855 856 857 858 859 860 861 862 863 864
data3 = data2[-NO_RAIN,]
data3$PH_RAIN<-ifelse(data3$PH_RAIN<5,1,0)
# 挑出70% 的資料進行建模 #
n <- nrow(data3)
set.seed(106354012)
t_idx <- sample(seq_len(n), size = round(0.7 * n))
data_train <- data3[t_idx,] %>% as.data.frame()
data_test <- data3[ - t_idx,] %>% as.data.frame()
#開始建模
model<-glm(PH_RAIN~.,family = binomial("logit"),data=data_train)
summary(model)
##
## Call:
## glm(formula = PH_RAIN ~ ., family = binomial("logit"), data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3930 -0.7803 0.5178 0.7803 2.0157
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.78897 3.92658 -0.965 0.33457
## SO2 0.15580 0.39294 0.396 0.69175
## CO -0.42058 2.33721 -0.180 0.85719
## O3 0.06353 0.02275 2.793 0.00522 **
## PM.10 -0.04845 0.03351 -1.446 0.14822
## PM.2.5 0.11091 0.05181 2.141 0.03231 *
## NOx 0.00898 0.65526 0.014 0.98907
## NO 0.01581 0.69675 0.023 0.98190
## NO2 0.08808 0.66027 0.133 0.89387
## WS_HR 0.30765 0.19097 1.611 0.10718
## AMB_TEMP -0.11967 0.05005 -2.391 0.01680 *
## RH 0.02548 0.03629 0.702 0.48259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.51 on 177 degrees of freedom
## Residual deviance: 179.92 on 166 degrees of freedom
## AIC: 203.92
##
## Number of Fisher Scoring iterations: 4
#
step(model,direction = "both")
## Start: AIC=203.92
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NOx + NO + NO2 + WS_HR +
## AMB_TEMP + RH
##
## Df Deviance AIC
## - NOx 1 179.92 201.92
## - NO 1 179.92 201.92
## - NO2 1 179.94 201.94
## - CO 1 179.95 201.95
## - SO2 1 180.08 202.08
## - RH 1 180.41 202.41
## <none> 179.92 203.92
## - PM.10 1 182.04 204.04
## - WS_HR 1 182.64 204.64
## - PM.2.5 1 184.81 206.81
## - AMB_TEMP 1 186.13 208.13
## - O3 1 188.64 210.64
##
## Step: AIC=201.92
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NO + NO2 + WS_HR +
## AMB_TEMP + RH
##
## Df Deviance AIC
## - NO 1 179.93 199.93
## - CO 1 179.95 199.95
## - SO2 1 180.08 200.08
## - RH 1 180.41 200.41
## - NO2 1 180.56 200.56
## <none> 179.92 201.92
## - PM.10 1 182.04 202.04
## - WS_HR 1 182.64 202.64
## + NOx 1 179.92 203.92
## - PM.2.5 1 184.84 204.84
## - AMB_TEMP 1 186.13 206.13
## - O3 1 188.67 208.67
##
## Step: AIC=199.93
## PH_RAIN ~ SO2 + CO + O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP +
## RH
##
## Df Deviance AIC
## - CO 1 179.96 197.96
## - SO2 1 180.08 198.08
## - RH 1 180.42 198.42
## - NO2 1 180.68 198.68
## <none> 179.93 199.93
## - PM.10 1 182.34 200.34
## - WS_HR 1 182.69 200.69
## + NO 1 179.92 201.92
## + NOx 1 179.92 201.92
## - PM.2.5 1 185.19 203.19
## - AMB_TEMP 1 186.45 204.45
## - O3 1 188.75 206.75
##
## Step: AIC=197.96
## PH_RAIN ~ SO2 + O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP +
## RH
##
## Df Deviance AIC
## - SO2 1 180.15 196.15
## - RH 1 180.44 196.44
## - NO2 1 180.70 196.70
## <none> 179.96 197.96
## - PM.10 1 182.38 198.38
## - WS_HR 1 182.83 198.83
## + CO 1 179.93 199.93
## + NO 1 179.95 199.95
## + NOx 1 179.95 199.95
## - PM.2.5 1 185.29 201.29
## - AMB_TEMP 1 187.38 203.38
## - O3 1 188.75 204.75
##
## Step: AIC=196.15
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP + RH
##
## Df Deviance AIC
## - RH 1 180.54 194.54
## - NO2 1 181.19 195.19
## <none> 180.15 196.15
## - PM.10 1 182.47 196.47
## - WS_HR 1 182.93 196.93
## + SO2 1 179.96 197.96
## + CO 1 180.08 198.08
## + NO 1 180.15 198.15
## + NOx 1 180.15 198.15
## - PM.2.5 1 185.79 199.79
## - AMB_TEMP 1 187.85 201.85
## - O3 1 188.84 202.84
##
## Step: AIC=194.54
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + NO2 + WS_HR + AMB_TEMP
##
## Df Deviance AIC
## - NO2 1 181.93 193.93
## <none> 180.54 194.54
## - PM.10 1 183.08 195.08
## - WS_HR 1 183.16 195.16
## + RH 1 180.15 196.15
## + SO2 1 180.44 196.44
## + CO 1 180.49 196.49
## + NOx 1 180.54 196.54
## + NO 1 180.54 196.54
## - PM.2.5 1 185.90 197.90
## - AMB_TEMP 1 187.89 199.89
## - O3 1 189.12 201.12
##
## Step: AIC=193.93
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + WS_HR + AMB_TEMP
##
## Df Deviance AIC
## - WS_HR 1 183.22 193.22
## - PM.10 1 183.54 193.54
## <none> 181.93 193.93
## + NO2 1 180.54 194.54
## + NOx 1 180.66 194.66
## + RH 1 181.19 195.19
## + SO2 1 181.59 195.59
## + NO 1 181.77 195.77
## + CO 1 181.91 195.91
## - PM.2.5 1 187.08 197.08
## - O3 1 189.57 199.57
## - AMB_TEMP 1 193.18 203.18
##
## Step: AIC=193.22
## PH_RAIN ~ O3 + PM.10 + PM.2.5 + AMB_TEMP
##
## Df Deviance AIC
## - PM.10 1 183.93 191.93
## <none> 183.22 193.22
## + WS_HR 1 181.93 193.93
## + RH 1 182.93 194.93
## - PM.2.5 1 187.08 195.08
## + SO2 1 183.14 195.14
## + CO 1 183.15 195.15
## + NO2 1 183.16 195.16
## + NOx 1 183.19 195.19
## + NO 1 183.20 195.20
## - O3 1 190.10 198.10
## - AMB_TEMP 1 195.57 203.57
##
## Step: AIC=191.93
## PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP
##
## Df Deviance AIC
## <none> 183.93 191.93
## + PM.10 1 183.22 193.22
## + RH 1 183.49 193.49
## + WS_HR 1 183.54 193.54
## + NOx 1 183.86 193.86
## + NO2 1 183.87 193.87
## + SO2 1 183.88 193.88
## + NO 1 183.88 193.88
## + CO 1 183.90 193.90
## - PM.2.5 1 189.28 195.28
## - O3 1 191.18 197.18
## - AMB_TEMP 1 202.45 208.45
##
## Call: glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"),
## data = data_train)
##
## Coefficients:
## (Intercept) O3 PM.2.5 AMB_TEMP
## 0.89075 0.04475 0.05050 -0.13787
##
## Degrees of Freedom: 177 Total (i.e. Null); 174 Residual
## Null Deviance: 227.5
## Residual Deviance: 183.9 AIC: 191.9
model2<- glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"),
data = data_train)
summary(model2)
##
## Call:
## glm(formula = PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"),
## data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6070 -0.8582 0.4918 0.8020 1.8447
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.89075 0.94479 0.943 0.34579
## O3 0.04475 0.01719 2.604 0.00922 **
## PM.2.5 0.05050 0.02294 2.201 0.02774 *
## AMB_TEMP -0.13787 0.03432 -4.017 5.91e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.51 on 177 degrees of freedom
## Residual deviance: 183.93 on 174 degrees of freedom
## AIC: 191.93
##
## Number of Fisher Scoring iterations: 4
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
vif(model2)
## O3 PM.2.5 AMB_TEMP
## 1.107959 1.095418 1.014624
anova(model,test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: PH_RAIN
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 177 227.51
## SO2 1 0.0668 176 227.44 0.7960752
## CO 1 16.7205 175 210.72 4.331e-05 ***
## O3 1 12.2587 174 198.47 0.0004631 ***
## PM.10 1 0.0368 173 198.43 0.8479026
## PM.2.5 1 6.0074 172 192.42 0.0142459 *
## NOx 1 0.0239 171 192.40 0.8770913
## NO 1 1.4858 170 190.91 0.2228634
## NO2 1 0.0080 169 190.90 0.9286556
## WS_HR 1 4.7593 168 186.15 0.0291397 *
## AMB_TEMP 1 5.7373 167 180.41 0.0166084 *
## RH 1 0.4894 166 179.92 0.4842123
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-glm(formula = PH_RAIN ~ CO + O3 + AMB_TEMP,family = binomial("logit"), data = data_train )
summary(model3)
##
## Call:
## glm(formula = PH_RAIN ~ CO + O3 + AMB_TEMP, family = binomial("logit"),
## data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2136 -0.8762 0.5149 0.8086 1.7502
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.49739 1.28188 0.388 0.698004
## CO 1.72859 1.81168 0.954 0.340013
## O3 0.05614 0.01664 3.373 0.000743 ***
## AMB_TEMP -0.12347 0.04068 -3.035 0.002407 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.51 on 177 degrees of freedom
## Residual deviance: 188.32 on 174 degrees of freedom
## AIC: 196.32
##
## Number of Fisher Scoring iterations: 4
anova(model3,test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: PH_RAIN
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 177 227.51
## CO 1 16.787 176 210.72 4.181e-05 ***
## O3 1 12.258 175 198.47 0.0004633 ***
## AMB_TEMP 1 10.143 174 188.32 0.0014483 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(model3)
## CO O3 AMB_TEMP
## 1.466196 1.015427 1.475001
outlier.test(model)
## Warning: 'outlier.test' is deprecated.
## Use 'outlierTest' instead.
## See help("Deprecated") and help("car-deprecated").
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 279 -2.571514 0.010126 NA
attach(data_train)
## The following objects are masked from data2:
##
## AMB_TEMP, CO, NO, NO2, NOx, O3, PH_RAIN, PM.10, PM.2.5, RH,
## SO2, WS_HR
#
model4<-glm(PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP),
family = binomial(link="logit"),data=data_train)
summary(model4)
##
## Call:
## glm(formula = PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO +
## O3 + AMB_TEMP), family = binomial(link = "logit"), data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1953 -0.9270 0.5136 0.8316 1.7895
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.925875 7.943214 0.368 0.713
## CO -14.930833 21.561425 -0.692 0.489
## O3 -0.049172 0.226904 -0.217 0.828
## AMB_TEMP -0.271011 0.325803 -0.832 0.406
## CO:O3 0.567679 0.673701 0.843 0.399
## CO:AMB_TEMP 0.983929 0.986909 0.997 0.319
## O3:AMB_TEMP 0.005609 0.009576 0.586 0.558
## CO:O3:AMB_TEMP -0.030848 0.029773 -1.036 0.300
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.51 on 177 degrees of freedom
## Residual deviance: 186.52 on 170 degrees of freedom
## AIC: 202.52
##
## Number of Fisher Scoring iterations: 5
step(model4,direction = "both")
## Start: AIC=202.52
## PH_RAIN ~ (CO + O3 + AMB_TEMP):(CO + O3 + AMB_TEMP):(CO + O3 +
## AMB_TEMP)
##
## Df Deviance AIC
## - CO:O3:AMB_TEMP 1 187.61 201.61
## <none> 186.52 202.52
##
## Step: AIC=201.61
## PH_RAIN ~ CO + O3 + AMB_TEMP + CO:O3 + CO:AMB_TEMP + O3:AMB_TEMP
##
## Df Deviance AIC
## - CO:AMB_TEMP 1 187.62 199.62
## - CO:O3 1 188.12 200.12
## - O3:AMB_TEMP 1 188.18 200.18
## <none> 187.61 201.61
## + CO:O3:AMB_TEMP 1 186.52 202.52
##
## Step: AIC=199.61
## PH_RAIN ~ CO + O3 + AMB_TEMP + CO:O3 + O3:AMB_TEMP
##
## Df Deviance AIC
## - CO:O3 1 188.15 198.15
## - O3:AMB_TEMP 1 188.21 198.21
## <none> 187.62 199.62
## + CO:AMB_TEMP 1 187.61 201.61
##
## Step: AIC=198.15
## PH_RAIN ~ CO + O3 + AMB_TEMP + O3:AMB_TEMP
##
## Df Deviance AIC
## - O3:AMB_TEMP 1 188.32 196.32
## - CO 1 189.28 197.28
## <none> 188.15 198.15
## + CO:O3 1 187.62 199.62
## + CO:AMB_TEMP 1 188.12 200.12
##
## Step: AIC=196.32
## PH_RAIN ~ CO + O3 + AMB_TEMP
##
## Df Deviance AIC
## - CO 1 189.28 195.28
## <none> 188.32 196.32
## + O3:AMB_TEMP 1 188.15 198.15
## + CO:O3 1 188.21 198.21
## + CO:AMB_TEMP 1 188.28 198.28
## - AMB_TEMP 1 198.47 204.47
## - O3 1 200.87 206.87
##
## Step: AIC=195.28
## PH_RAIN ~ O3 + AMB_TEMP
##
## Df Deviance AIC
## <none> 189.28 195.28
## + CO 1 188.32 196.32
## + O3:AMB_TEMP 1 189.28 197.28
## - O3 1 203.38 207.38
## - AMB_TEMP 1 211.35 215.35
##
## Call: glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"),
## data = data_train)
##
## Coefficients:
## (Intercept) O3 AMB_TEMP
## 1.35691 0.05797 -0.14656
##
## Degrees of Freedom: 177 Total (i.e. Null); 175 Residual
## Null Deviance: 227.5
## Residual Deviance: 189.3 AIC: 195.3
model5<-glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"),
data = data_train)
summary(model5)
##
## Call:
## glm(formula = PH_RAIN ~ O3 + AMB_TEMP, family = binomial(link = "logit"),
## data = data_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0373 -0.8956 0.5135 0.8134 1.7046
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.35691 0.92432 1.468 0.1421
## O3 0.05797 0.01637 3.540 0.0004 ***
## AMB_TEMP -0.14656 0.03383 -4.332 1.47e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.51 on 177 degrees of freedom
## Residual deviance: 189.28 on 175 degrees of freedom
## AIC: 195.28
##
## Number of Fisher Scoring iterations: 4
# 機器學習 #
table(data_train$PH_RAIN)
##
## 0 1
## 60 118
a<-118/178#資料集機率層級
modle_train <- glm(PH_RAIN ~ CO + O3 + AMB_TEMP, family = binomial("logit"),data =data_train)
result <- predict(modle_train, newdata = data_test, type = "response")
result_Approved <- ifelse(result >a, 1, 0)
cm <- table(data_test$PH_RAIN, result_Approved, dnn = c("實際", "預測"))
cm
## 預測
## 實際 0 1
## 0 19 11
## 1 12 35
cm[4] / sum(cm[, 2])
## [1] 0.7608696
cm[1] / sum(cm[, 1])
## [1] 0.6129032
accuracy <- sum(diag(cm)) / sum(cm);accuracy
## [1] 0.7012987
# 機器學習 #
modle_train <- glm(PH_RAIN ~ O3 + PM.2.5 + AMB_TEMP, family = binomial("logit"),data =data_train)
result <- predict(modle_train, newdata = data_test, type = "response")
result_Approved <- ifelse(result > a, 1, 0)
cm <- table(data_test$PH_RAIN, result_Approved, dnn = c("實際", "預測"))
cm
## 預測
## 實際 0 1
## 0 19 11
## 1 14 33
cm[4] / sum(cm[, 2])
## [1] 0.75
cm[1] / sum(cm[, 1])
## [1] 0.5757576
accuracy <- sum(diag(cm)) / sum(cm);accuracy
## [1] 0.6753247
#畫ROC曲線
library("ROCR")
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
pred <- prediction(result, data_test$PH_RAIN)
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
#計算AUC
auc <- performance(pred, "auc")
#畫圖
plot(perf, col = rainbow(7), main = "ROC curve", xlab = "1 - Specificity(FPR)", ylab = "Sensitivity(TPR)")
abline(0, 1)
#實際AUC值
text(0.5, 0.5, as.character(auc@y.values[[1]]))

#AVPLOTS
avPlots(model2)

#畫盒鬚圖
# c(1,2),表示建立一個2x2的空間,用來呈現後續的圖
par(mfrow = c(1,1),cex=1.2,font=2,col=4)
boxplot(formula = data_train$SO2 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "SO2濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$CO ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "CO濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$O3 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "O3濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$PM.10 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "PM10濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$PM.2.5 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "PM2.5濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$NOx ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "氮氧化濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$NO2 ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "二氧化氮濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$NO ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "一氧化氮濃度 (ppb)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$WS_HR ~ data_train$PH_RAIN ,# Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "小時風速值(m/sec)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$AMB_TEMP ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "攝氏溫度(℃)", # Y軸名稱
col = "gray") # 顏色

boxplot(formula = data_train$RH ~ data_train$PH_RAIN , # Y ~ X (代表X和Y軸要放的數值)
data = data_train, # 資料
xlab = "是否為酸雨", # X軸名稱
ylab = "相對濕度", # Y軸名稱
col = "gray") # 顏色
