1.Instalacja i wczytanie potrzebnych pakietów


  1. Wczytuje potrzebne pakiety oraz funkcje, które pozwolą mi zaimportować potrzebne dane.

2.ROZKLAD DANYCH NA WYKRESIE POKAZANY ROZRZUT DANYCH


##        RH              PM10             PM2.5              SO2         
##  Min.   : 26.89   Min.   :  6.403   Min.   :  1.118   Min.   : 0.0000  
##  1st Qu.: 63.51   1st Qu.: 21.585   1st Qu.: 10.414   1st Qu.: 0.9971  
##  Median : 78.42   Median : 29.426   Median : 16.421   Median : 1.7885  
##  Mean   : 75.03   Mean   : 36.996   Mean   : 24.221   Mean   : 3.3057  
##  3rd Qu.: 88.83   3rd Qu.: 43.184   3rd Qu.: 28.723   3rd Qu.: 3.6350  
##  Max.   :100.00   Max.   :246.134   Max.   :233.760   Max.   :58.4080  
##       NO2               NOx                O3        
##  Min.   :  1.102   Min.   :  1.142   Min.   :  0.00  
##  1st Qu.: 11.575   1st Qu.: 13.583   1st Qu.: 29.17  
##  Median : 19.313   Median : 23.130   Median : 50.80  
##  Mean   : 23.899   Mean   : 33.733   Mean   : 52.39  
##  3rd Qu.: 32.019   3rd Qu.: 40.728   3rd Qu.: 72.08  
##  Max.   :114.861   Max.   :672.814   Max.   :153.57


3.HISTOGRAMY DLA PM10 i PM2.5


hist(dane$PM10)

hist(dane$PM2.5)


4.PELNY WYDRUK WARTOSCI


pm2.5.lm2 <- lm(PM2.5~PM10 + RH + SO2,data=dane)
pm10.lm2 <- lm(PM10~PM2.5 + RH + SO2,data=dane)
pm2.5.lm <- lm(PM2.5~PM10,data=dane)
pm10.lm <- lm(PM10~PM2.5,data=dane)



summary(pm10.lm)
## 
## Call:
## lm(formula = PM10 ~ PM2.5, data = dane)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.873  -5.908  -2.411   2.835 188.157 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.094704   0.223817   58.51   <2e-16 ***
## PM2.5        0.986795   0.006688  147.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.92 on 4996 degrees of freedom
## Multiple R-squared:  0.8133, Adjusted R-squared:  0.8133 
## F-statistic: 2.177e+04 on 1 and 4996 DF,  p-value: < 2.2e-16
summary(pm2.5.lm)
## 
## Call:
## lm(formula = PM2.5 ~ PM10, data = dane)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -151.116   -3.731    0.808    4.775   38.696 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.272061   0.250269  -25.06   <2e-16 ***
## PM10         0.824229   0.005586  147.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.979 on 4996 degrees of freedom
## Multiple R-squared:  0.8133, Adjusted R-squared:  0.8133 
## F-statistic: 2.177e+04 on 1 and 4996 DF,  p-value: < 2.2e-16
summary(pm10.lm2)
## 
## Call:
## lm(formula = PM10 ~ PM2.5 + RH + SO2, data = dane)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.906  -5.604  -2.077   2.725 183.942 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 26.31711    0.72632  36.233  < 2e-16 ***
## PM2.5        1.06023    0.01009 105.057  < 2e-16 ***
## RH          -0.19145    0.01004 -19.069  < 2e-16 ***
## SO2         -0.19267    0.04893  -3.938 8.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.54 on 4994 degrees of freedom
## Multiple R-squared:  0.8261, Adjusted R-squared:  0.826 
## F-statistic:  7909 on 3 and 4994 DF,  p-value: < 2.2e-16
summary(pm2.5.lm2)
## 
## Call:
## lm(formula = PM2.5 ~ PM10 + RH + SO2, data = dane)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.064   -3.531    0.374    4.148   59.649 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -23.420715   0.546092  -42.89   <2e-16 ***
## PM10          0.649370   0.006181  105.06   <2e-16 ***
## RH            0.260559   0.007255   35.91   <2e-16 ***
## SO2           1.230613   0.034171   36.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.249 on 4994 degrees of freedom
## Multiple R-squared:  0.8725, Adjusted R-squared:  0.8724 
## F-statistic: 1.139e+04 on 3 and 4994 DF,  p-value: < 2.2e-16
pm10.lm$coef
## (Intercept)       PM2.5 
##  13.0947035   0.9867951
pm10.lm2$coef
## (Intercept)       PM2.5          RH         SO2 
##  26.3171132   1.0602255  -0.1914454  -0.1926680
pm2.5.lm$coef
## (Intercept)        PM10 
##  -6.2720608   0.8242291
pm2.5.lm2$coef
## (Intercept)        PM10          RH         SO2 
## -23.4207151   0.6493700   0.2605593   1.2306125
round(summary(pm10.lm)$coef,3)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   13.095      0.224  58.506        0
## PM2.5          0.987      0.007 147.547        0
round(summary(pm10.lm2)$coef,3)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   26.317      0.726  36.233        0
## PM2.5          1.060      0.010 105.057        0
## RH            -0.191      0.010 -19.069        0
## SO2           -0.193      0.049  -3.938        0
round(summary(pm2.5.lm)$coef,3)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -6.272      0.250 -25.061        0
## PM10           0.824      0.006 147.547        0
round(summary(pm2.5.lm2)$coef,3)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -23.421      0.546 -42.888        0
## PM10           0.649      0.006 105.057        0
## RH             0.261      0.007  35.915        0
## SO2            1.231      0.034  36.013        0

5.ROZRZUT DANYCH


plot(PM10 ~  RH,data = dane)

plot(PM2.5 ~  RH,data = dane)


6.BOXPLOTY DLA PARAMETROW PM10 i PM2.5


qplot(dane$PM10,dane$PM2.5, geom = "boxplot", data = dane)

qplot(dane$PM2.5,dane$PM10, geom = "boxplot", data = dane)


7.REGRESJA WIELOKROTNA DLA PM10


pm10.lm <- lm(PM10~PM2.5,data=dane)
plot(pm10.lm)

pm2.5.lm <- lm(PM2.5~PM10,data=dane)
plot(pm2.5.lm)


8.REGRESJA WIELOKROTNA DLA PM2.5


pm10.lm2 <- lm(PM10~PM2.5 + RH + SO2,data=dane)
plot(pm10.lm2)

pm2.5.lm2 <- lm(PM2.5~PM10 + RH + SO2,data=dane)
plot(pm2.5.lm2)


9.MODEL REGRESJI



10.REGRESJA


ggplot(dane,aes(x = PM10, y = PM2.5)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red")


11.ALGORYTM BORUTA


algorytmBoruta=Boruta(PM2.5~.,dane) 
boruta_signif <- names(algorytmBoruta$finalDecision[algorytmBoruta$finalDecision %in% c("Confirmed", "Tentative")])  
print(boruta_signif) 
## [1] "RH"   "PM10" "SO2"  "NO2"  "NOx"  "O3"
plot(algorytmBoruta, cex.axis=.8, las=3, xlab="")


12.WYKRES LINI REGRESJI


dane.lm=lm(PM10~.,data=dane,na.action = na.exclude)
qplot(x = RH, y = PM10 + PM2.5, color = PM2.5, data = dane.lm) + stat_smooth(method = "lm", se = FALSE, fullrange = TRUE)


13.WYKRES GESTOSCI DLA PM10 I PM2.5


plot(density(dane$PM10), main="Density Plot", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2)))  # density plot for 'dist'
polygon(density(dane$PM10), col="red")

plot(density(dane$PM2.5), main="Density Plot", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2)))  # density plot for 'dist'
polygon(density(dane$PM2.5), col="red")


14.BLAD RESZT


ggplot(dane.lm, aes(PM10, PM2.5)) +
  geom_point() +
  stat_smooth(method = lm, se = FALSE) +
  geom_segment(aes(xend = PM10, yend = .fitted), color = "red", size = 0.3)


15.WZGLEDNE WYZNACZENIE PAKIET “relaimpo”


boot <- boot.relimp(dane, b = 100, type = c("lmg",
                                            "last", "first", "pratt"), rank = TRUE,
                    diff = TRUE, rela = TRUE)
booteval.relimp(boot)
## Response variable: RH 
## Total response variance: 273.9855 
## Analysis based on 4998 observations 
## 
## 6 Regressors: 
## PM10 PM2.5 SO2 NO2 NOx O3 
## Proportion of variance explained by model: 63.67%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##              lmg         last      first        pratt
## PM10  0.05123893 0.0416508437 0.05057687 -0.118760025
## PM2.5 0.12648515 0.0976416352 0.14043369  0.301935118
## SO2   0.05222588 0.1039970421 0.00235632 -0.021062690
## NO2   0.06532700 0.0217320035 0.11650886 -0.098647843
## NOx   0.03207646 0.0002467624 0.08060646 -0.008042537
## O3    0.67264658 0.7347317131 0.60951780  0.944577976
## 
## Average coefficients for different model sizes: 
## 
##               1X         2Xs          3Xs         4Xs         5Xs          6Xs
## PM10   0.1352692 -0.04776183 -0.176063584 -0.23749443 -0.25096630 -0.239828155
## PM2.5  0.2466313  0.27850598  0.353683323  0.40806222  0.41975082  0.400380902
## SO2    0.1648046 -0.65390190 -0.850211494 -1.00542676 -1.09229999 -1.112325101
## NO2    0.3161184  0.16509618  0.051393956 -0.04759372 -0.13315472 -0.202097682
## NOx    0.1200584  0.03438370 -0.006552206 -0.02194098 -0.02050362 -0.009044792
## O3    -0.3768641 -0.43306026 -0.453126682 -0.46018885 -0.45521976 -0.440980223
## 
##  
##  Confidence interval information ( 100 bootstrap replicates, bty= perc ): 
## Relative Contributions with confidence intervals: 
##  
##                               Lower  Upper
##             percentage 0.95   0.95    0.95   
## PM10.lmg     0.0512    ___DE_  0.0454  0.0562
## PM2.5.lmg    0.1265    _B____  0.1177  0.1367
## SO2.lmg      0.0522    ___DE_  0.0460  0.0597
## NO2.lmg      0.0653    __C___  0.0600  0.0711
## NOx.lmg      0.0321    _____F  0.0275  0.0359
## O3.lmg       0.6726    A_____  0.6550  0.6865
##                                              
## PM10.last    0.0417    ___D__  0.0297  0.0535
## PM2.5.last   0.0976    _BC___  0.0827  0.1148
## SO2.last     0.1040    _BC___  0.0896  0.1174
## NO2.last     0.0217    ____E_  0.0151  0.0313
## NOx.last     0.0002    _____F  0.0000  0.0014
## O3.last      0.7347    A_____  0.7060  0.7640
##                                              
## PM10.first   0.0506    ____E_  0.0440  0.0606
## PM2.5.first  0.1404    _B____  0.1318  0.1511
## SO2.first    0.0024    _____F  0.0006  0.0049
## NO2.first    0.1165    __C___  0.1037  0.1268
## NOx.first    0.0806    ___D__  0.0729  0.0869
## O3.first     0.6095    A_____  0.5874  0.6301
##                                              
## PM10.pratt  -0.1188    ____EF -0.1483 -0.0933
## PM2.5.pratt  0.3019    _B____  0.2639  0.3450
## SO2.pratt   -0.0211    __CD__ -0.0307 -0.0104
## NO2.pratt   -0.0986    ____EF -0.1130 -0.0846
## NOx.pratt   -0.0080    __CD__ -0.0212  0.0041
## O3.pratt     0.9446    A_____  0.9197  0.9694
## 
## Letters indicate the ranks covered by bootstrap CIs. 
## (Rank bootstrap confidence intervals always obtained by percentile method) 
## CAUTION: Bootstrap confidence intervals can be somewhat liberal. 
## 
##  
##  Differences between Relative Contributions: 
##  
##                                  Lower   Upper
##                  difference 0.95 0.95    0.95   
## PM10-PM2.5.lmg   -0.0752     *   -0.0846 -0.0665
## PM10-SO2.lmg     -0.0010         -0.0100  0.0071
## PM10-NO2.lmg     -0.0141     *   -0.0211 -0.0063
## PM10-NOx.lmg      0.0192     *    0.0117  0.0260
## PM10-O3.lmg      -0.6214     *   -0.6413 -0.5998
## PM2.5-SO2.lmg     0.0743     *    0.0641  0.0833
## PM2.5-NO2.lmg     0.0612     *    0.0480  0.0736
## PM2.5-NOx.lmg     0.0944     *    0.0830  0.1062
## PM2.5-O3.lmg     -0.5462     *   -0.5672 -0.5212
## SO2-NO2.lmg      -0.0131     *   -0.0233 -0.0015
## SO2-NOx.lmg       0.0201     *    0.0108  0.0296
## SO2-O3.lmg       -0.6204     *   -0.6381 -0.5992
## NO2-NOx.lmg       0.0333     *    0.0259  0.0398
## NO2-O3.lmg       -0.6073     *   -0.6240 -0.5873
## NOx-O3.lmg       -0.6406     *   -0.6529 -0.6230
##                                                 
## PM10-PM2.5.last  -0.0560     *   -0.0684 -0.0451
## PM10-SO2.last    -0.0623     *   -0.0797 -0.0442
## PM10-NO2.last     0.0199     *    0.0040  0.0351
## PM10-NOx.last     0.0414     *    0.0295  0.0533
## PM10-O3.last     -0.6931     *   -0.7315 -0.6558
## PM2.5-SO2.last   -0.0064         -0.0188  0.0081
## PM2.5-NO2.last    0.0759     *    0.0581  0.0986
## PM2.5-NOx.last    0.0974     *    0.0822  0.1142
## PM2.5-O3.last    -0.6371     *   -0.6819 -0.5932
## SO2-NO2.last      0.0823     *    0.0658  0.0993
## SO2-NOx.last      0.1038     *    0.0893  0.1171
## SO2-O3.last      -0.6307     *   -0.6671 -0.5914
## NO2-NOx.last      0.0215     *    0.0139  0.0313
## NO2-O3.last      -0.7130     *   -0.7425 -0.6845
## NOx-O3.last      -0.7345     *   -0.7635 -0.7058
##                                                 
## PM10-PM2.5.first -0.0899     *   -0.0993 -0.0800
## PM10-SO2.first    0.0482     *    0.0416  0.0576
## PM10-NO2.first   -0.0659     *   -0.0738 -0.0543
## PM10-NOx.first   -0.0300     *   -0.0404 -0.0196
## PM10-O3.first    -0.5589     *   -0.5849 -0.5278
## PM2.5-SO2.first   0.1381     *    0.1283  0.1481
## PM2.5-NO2.first   0.0239     *    0.0078  0.0400
## PM2.5-NOx.first   0.0598     *    0.0470  0.0732
## PM2.5-O3.first   -0.4691     *   -0.4973 -0.4370
## SO2-NO2.first    -0.1142     *   -0.1237 -0.1019
## SO2-NOx.first    -0.0783     *   -0.0845 -0.0706
## SO2-O3.first     -0.6072     *   -0.6291 -0.5840
## NO2-NOx.first     0.0359     *    0.0266  0.0444
## NO2-O3.first     -0.4930     *   -0.5185 -0.4635
## NOx-O3.first     -0.5289     *   -0.5540 -0.5050
##                                                 
## PM10-PM2.5.pratt -0.4207     *   -0.4929 -0.3603
## PM10-SO2.pratt   -0.0977     *   -0.1213 -0.0718
## PM10-NO2.pratt   -0.0201         -0.0530  0.0106
## PM10-NOx.pratt   -0.1107     *   -0.1431 -0.0770
## PM10-O3.pratt    -1.0633     *   -1.0976 -1.0381
## PM2.5-SO2.pratt   0.3230     *    0.2846  0.3730
## PM2.5-NO2.pratt   0.4006     *    0.3645  0.4434
## PM2.5-NOx.pratt   0.3100     *    0.2727  0.3539
## PM2.5-O3.pratt   -0.6426     *   -0.7002 -0.5868
## SO2-NO2.pratt     0.0776     *    0.0596  0.0945
## SO2-NOx.pratt    -0.0130         -0.0242  0.0046
## SO2-O3.pratt     -0.9656     *   -0.9916 -0.9367
## NO2-NOx.pratt    -0.0906     *   -0.1139 -0.0667
## NO2-O3.pratt     -1.0432     *   -1.0765 -1.0066
## NOx-O3.pratt     -0.9526     *   -0.9777 -0.9232
## 
## * indicates that CI for difference does not include 0. 
## CAUTION: Bootstrap confidence intervals can be somewhat liberal.
plot(booteval.relimp(boot,sort=TRUE))


16.MODEL REGRESJI WIELOKROTNEJ


equation1=function(x){coef(fit1)[2]*x+coef(fit1)[1]}
equation2=function(x){coef(fit1)[2]*x+coef(fit1)[1]+coef(fit1)[3]}

ggplot(dane.lm,aes(y=PM2.5,x=PM10,color=RH))+geom_point()+
  stat_function(fun=equation1,geom="line",color=scales::hue_pal()(2)[1])+
  stat_function(fun=equation2,geom="line",color=scales::hue_pal()(2)[2])


17.WARTOSCI ODSTAJACE


qqPlot(dane.lm, main="QQ Plot") 

## [1] 1988 2138
leveragePlots(dane.lm)